diff --git a/pkg/sql/colexec/colexecjoin/crossjoiner.eg.go b/pkg/sql/colexec/colexecjoin/crossjoiner.eg.go
index 85fd93c7462d..7514686a8d94 100644
--- a/pkg/sql/colexec/colexecjoin/crossjoiner.eg.go
+++ b/pkg/sql/colexec/colexecjoin/crossjoiner.eg.go
@@ -26,1386 +26,1299 @@ var (
 	_ = types.BoolFamily
 )
 
+// buildFromLeftBatch is the body of buildFromLeftInput that templates out the
+// fact whether the current left batch has a selection vector or not.
+// execgen:inline
+const _ = "template_buildFromLeftBatch"
+
 // buildFromLeftInput builds part of the output of a cross join that comes from
 // the vectors of the left input. The new output tuples are put starting at
 // index destStartIdx and will not exceed the capacity of the output batch. It
-// is assumed that setupBuilder has been called.
+// is assumed that setupLeftBuilder and prepareForNextLeftBatch have been
+// called.
 // The goal of this method is to repeat each tuple from the left input
-// leftNumRepeats times. For set-operation joins only first setOpLeftSrcIdx
-// tuples are built from.
+// leftNumRepeats times. Only the tuples in [curSrcStartIdx, leftSrcEndIdx) are
+// used from the current left batch.
 func (b *crossJoinerBase) buildFromLeftInput(ctx context.Context, destStartIdx int) {
-	var err error
 	currentBatch := b.builderState.left.currentBatch
-	if currentBatch == nil || b.builderState.left.curSrcStartIdx == currentBatch.Length() {
-		// We need to get the next batch to build from if it is the first one or
-		// we have fully processed the previous one.
-		currentBatch, err = b.left.tuples.Dequeue(ctx)
-		if err != nil {
-			colexecerror.InternalError(err)
-		}
-		b.builderState.left.currentBatch = currentBatch
-		b.builderState.left.curSrcStartIdx = 0
-		b.builderState.left.numRepeatsIdx = 0
-	}
-	initialBuilderState := b.builderState.left
 	b.left.unlimitedAllocator.PerformOperation(
 		b.output.ColVecs()[:len(b.left.types)],
 		func() {
-			leftNumRepeats := b.builderState.setup.leftNumRepeats
-			isSetOp := b.joinType.IsSetOpJoin()
-			outputCapacity := b.output.Capacity()
-			batchLength := currentBatch.Length()
-			for batchLength > 0 {
-				// Loop over every column.
-			LeftColLoop:
-				for colIdx := range b.left.types {
-					outStartIdx := destStartIdx
-					src := currentBatch.ColVec(colIdx)
-					srcNulls := src.Nulls()
-					out := b.output.ColVec(colIdx)
-					outNulls := out.Nulls()
-					switch b.left.canonicalTypeFamilies[colIdx] {
-					case types.BoolFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Bool()
-							outCol := out.Bool()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+			if sel := currentBatch.Selection(); sel != nil {
+				{
+					// We'll be modifying the builder state as we go, but we'll need to be able
+					// to restore the state to initial for each column.
+					initialBuilderState := b.builderState.left
+					bs := &b.builderState.left
+					leftNumRepeats := b.builderState.setup.leftNumRepeats
+					leftSrcEndIdx := b.builderState.setup.leftSrcEndIdx
+					outputCapacity := b.output.Capacity()
+					var srcStartIdx int
+					// Loop over every column.
+					for colIdx := range b.left.types {
+						if colIdx > 0 {
+							// Restore the builder state so that this new column started off
+							// fresh.
+							*bs = initialBuilderState
+						}
+						outStartIdx := destStartIdx
+						src := currentBatch.ColVec(colIdx)
+						srcNulls := src.Nulls()
+						out := b.output.ColVec(colIdx)
+						outNulls := out.Nulls()
+						switch b.left.canonicalTypeFamilies[colIdx] {
+						case types.BoolFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Bool()
+								outCol := out.Bool()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.BytesFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Bytes()
+								outCol := out.Bytes()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case types.BytesFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Bytes()
-							outCol := out.Bytes()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.DecimalFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Decimal()
+								outCol := out.Decimal()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.IntFamily:
+							switch b.left.types[colIdx].Width() {
+							case 16:
+								srcCol := src.Int16()
+								outCol := out.Int16()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
-											}
-											return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
-								}
-							}
-						}
-					case types.DecimalFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Decimal()
-							outCol := out.Decimal()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
 								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+							case 32:
+								srcCol := src.Int32()
+								outCol := out.Int32()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
+										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							case -1:
+							default:
+								srcCol := src.Int64()
+								outCol := out.Int64()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case types.IntFamily:
-						switch b.left.types[colIdx].Width() {
-						case 16:
-							srcCol := src.Int16()
-							outCol := out.Int16()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.FloatFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Float64()
+								outCol := out.Float64()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.TimestampTZFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Timestamp()
+								outCol := out.Timestamp()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						case 32:
-							srcCol := src.Int32()
-							outCol := out.Int32()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.IntervalFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Interval()
+								outCol := out.Interval()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.JsonFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.JSON()
+								outCol := out.JSON()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						case -1:
-						default:
-							srcCol := src.Int64()
-							outCol := out.Int64()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case typeconv.DatumVecCanonicalTypeFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Datum()
+								outCol := out.Datum()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = sel[bs.curSrcStartIdx]
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						default:
+							colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", b.left.types[colIdx].String()))
+						}
+					}
+				}
+			} else {
+				{
+					var sel []int = nil
+					// Remove the unused warning.
+					_ = sel
+					// We'll be modifying the builder state as we go, but we'll need to be able
+					// to restore the state to initial for each column.
+					initialBuilderState := b.builderState.left
+					bs := &b.builderState.left
+					leftNumRepeats := b.builderState.setup.leftNumRepeats
+					leftSrcEndIdx := b.builderState.setup.leftSrcEndIdx
+					outputCapacity := b.output.Capacity()
+					var srcStartIdx int
+					// Loop over every column.
+					for colIdx := range b.left.types {
+						if colIdx > 0 {
+							// Restore the builder state so that this new column started off
+							// fresh.
+							*bs = initialBuilderState
+						}
+						outStartIdx := destStartIdx
+						src := currentBatch.ColVec(colIdx)
+						srcNulls := src.Nulls()
+						out := b.output.ColVec(colIdx)
+						outNulls := out.Nulls()
+						switch b.left.canonicalTypeFamilies[colIdx] {
+						case types.BoolFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Bool()
+								outCol := out.Bool()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case types.FloatFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Float64()
-							outCol := out.Float64()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.BytesFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Bytes()
+								outCol := out.Bytes()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.DecimalFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Decimal()
+								outCol := out.Decimal()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case types.TimestampTZFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Timestamp()
-							outCol := out.Timestamp()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.IntFamily:
+							switch b.left.types[colIdx].Width() {
+							case 16:
+								srcCol := src.Int16()
+								outCol := out.Int16()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							case 32:
+								srcCol := src.Int32()
+								outCol := out.Int32()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
-											}
-											return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
-								}
-							}
-						}
-					case types.IntervalFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Interval()
-							outCol := out.Interval()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
 								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+							case -1:
+							default:
+								srcCol := src.Int64()
+								outCol := out.Int64()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
+										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.FloatFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Float64()
+								outCol := out.Float64()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case types.JsonFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.JSON()
-							outCol := out.JSON()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.TimestampTZFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Timestamp()
+								outCol := out.Timestamp()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case types.IntervalFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Interval()
+								outCol := out.Interval()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
-						}
-					case typeconv.DatumVecCanonicalTypeFamily:
-						switch b.left.types[colIdx].Width() {
-						case -1:
-						default:
-							srcCol := src.Datum()
-							outCol := out.Datum()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
+						case types.JsonFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.JSON()
+								outCol := out.JSON()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											outCol.Set(outStartIdx, val)
 										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
 										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
 									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
+								}
+							}
+						case typeconv.DatumVecCanonicalTypeFamily:
+							switch b.left.types[colIdx].Width() {
+							case -1:
+							default:
+								srcCol := src.Datum()
+								outCol := out.Datum()
+								if leftNumRepeats == 1 {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each tuple one time.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNull(outStartIdx)
+										} else {
+											val := srcCol.Get(srcStartIdx)
 											outCol.Set(outStartIdx, val)
-											outStartIdx++
 										}
+										outStartIdx++
+										bs.curSrcStartIdx++
 									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
+								} else {
+									// Loop over every tuple in the current batch.
+									for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+										// Repeat each row leftNumRepeats times.
+										_ = true
+										srcStartIdx = bs.curSrcStartIdx
+										toAppend := leftNumRepeats - bs.numRepeatsIdx
+										if outStartIdx+toAppend > outputCapacity {
+											// We don't have enough space to repeat the current
+											// value the required number of times, so we'll have
+											// to continue from here on the next call.
+											toAppend = outputCapacity - outStartIdx
+											bs.numRepeatsIdx += toAppend
+										} else {
+											// We fully processed the current tuple for the
+											// current column, and before moving on to the next
+											// one, we need to reset numRepeatsIdx (so that the
+											// next tuple would be repeated leftNumRepeats
+											// times).
+											bs.curSrcStartIdx++
+											bs.numRepeatsIdx = 0
+										}
+										if srcNulls.NullAt(srcStartIdx) {
+											outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+											outStartIdx += toAppend
+										} else {
+											val := srcCol.Get(srcStartIdx)
+											for i := 0; i < toAppend; i++ {
+												outCol.Set(outStartIdx, val)
+												outStartIdx++
 											}
-											return
 										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
 									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
 								}
 							}
+						default:
+							colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", b.left.types[colIdx].String()))
 						}
-					default:
-						colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", b.left.types[colIdx].String()))
 					}
-					if colIdx == len(b.left.types)-1 {
-						// We have appended some tuples into the output batch from the current
-						// batch (the latter is now fully processed), so we need to adjust
-						// destStartIdx accordingly for the next batch.
-						destStartIdx = outStartIdx
-					} else {
-						b.builderState.left = initialBuilderState
-					}
-				}
-				// We have processed all tuples in the current batch from the
-				// buffered group, so we need to Dequeue the next one.
-				currentBatch, err = b.left.tuples.Dequeue(ctx)
-				if err != nil {
-					colexecerror.InternalError(err)
 				}
-				b.builderState.left.currentBatch = currentBatch
-				batchLength = currentBatch.Length()
-				// We have transitioned to building from a new batch, so we
-				// need to update the builder state to build from the beginning
-				// of the new batch.
-				b.builderState.left.curSrcStartIdx = 0
-				b.builderState.left.numRepeatsIdx = 0
-				// We also need to update 'initialBuilderState' so that the
-				// builder state gets reset correctly in-between different
-				// columns in the loop above.
-				initialBuilderState = b.builderState.left
 			}
 		},
 	)
@@ -1414,30 +1327,34 @@ func (b *crossJoinerBase) buildFromLeftInput(ctx context.Context, destStartIdx i
 // buildFromRightInput builds part of the output of a cross join that comes from
 // the vectors of the right input. The new output tuples are put starting at
 // index destStartIdx and will not exceed the capacity of the output batch. It
-// is assumed that setupBuilder has been called.
+// is assumed that the right input has been fully consumed and is stored in
+// b.rightTuples spilling queue.
 // The goal of this method is to repeat all tuples from the right input
 // rightNumRepeats times (i.e. repeating the whole list of tuples at once).
 func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx int) {
 	var err error
+	bs := &b.builderState.right
+	rightNumRepeats := b.builderState.setup.rightNumRepeats
 	b.right.unlimitedAllocator.PerformOperation(
 		b.output.ColVecs()[b.builderState.rightColOffset:],
 		func() {
 			outStartIdx := destStartIdx
 			outputCapacity := b.output.Capacity()
-			// Repeat the buffered tuples rightNumRepeats times.
-			for ; b.builderState.right.numRepeatsIdx < b.builderState.setup.rightNumRepeats; b.builderState.right.numRepeatsIdx++ {
-				currentBatch := b.builderState.right.currentBatch
+			// Repeat the buffered tuples rightNumRepeats times until we fill
+			// the output capacity.
+			for ; outStartIdx < outputCapacity && bs.numRepeatsIdx < rightNumRepeats; bs.numRepeatsIdx++ {
+				currentBatch := bs.currentBatch
 				if currentBatch == nil {
-					currentBatch, err = b.right.tuples.Dequeue(ctx)
+					currentBatch, err = b.rightTuples.Dequeue(ctx)
 					if err != nil {
 						colexecerror.InternalError(err)
 					}
-					b.builderState.right.currentBatch = currentBatch
-					b.builderState.right.curSrcStartIdx = 0
+					bs.currentBatch = currentBatch
+					bs.curSrcStartIdx = 0
 				}
 				batchLength := currentBatch.Length()
 				for batchLength > 0 {
-					toAppend := batchLength - b.builderState.right.curSrcStartIdx
+					toAppend := batchLength - bs.curSrcStartIdx
 					if outStartIdx+toAppend > outputCapacity {
 						toAppend = outputCapacity - outStartIdx
 					}
@@ -1459,10 +1376,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1470,8 +1387,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1486,10 +1403,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1497,8 +1414,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1513,10 +1430,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1524,8 +1441,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1539,10 +1456,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1550,8 +1467,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1562,10 +1479,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1573,8 +1490,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1586,10 +1503,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1597,8 +1514,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1613,10 +1530,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1624,8 +1541,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1640,10 +1557,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1651,8 +1568,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1667,10 +1584,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1678,8 +1595,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1694,10 +1611,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1705,8 +1622,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1721,10 +1638,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -1732,8 +1649,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -1744,34 +1661,38 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 					}
 					outStartIdx += toAppend
 
-					if toAppend < batchLength-b.builderState.right.curSrcStartIdx {
+					if toAppend < batchLength-bs.curSrcStartIdx {
 						// If we haven't materialized all the tuples from the
 						// batch, then we are ready to emit the output batch.
-						b.builderState.right.curSrcStartIdx += toAppend
+						bs.curSrcStartIdx += toAppend
 						return
 					}
 					// We have fully processed the current batch, so we need to
 					// get the next one.
-					currentBatch, err = b.right.tuples.Dequeue(ctx)
+					currentBatch, err = b.rightTuples.Dequeue(ctx)
 					if err != nil {
 						colexecerror.InternalError(err)
 					}
-					b.builderState.right.currentBatch = currentBatch
+					bs.currentBatch = currentBatch
 					batchLength = currentBatch.Length()
-					b.builderState.right.curSrcStartIdx = 0
-
-					if outStartIdx == outputCapacity {
-						// We reached the capacity of the output batch, so we
-						// can emit it.
-						return
-					}
+					bs.curSrcStartIdx = 0
 				}
 				// We have fully processed all the batches from the right side,
 				// so we need to Rewind the queue.
-				if err := b.right.tuples.Rewind(); err != nil {
+				if err := b.rightTuples.Rewind(); err != nil {
 					colexecerror.InternalError(err)
 				}
-				b.builderState.right.currentBatch = nil
+				bs.currentBatch = nil
 			}
 		})
 }
+
+// buildFromLeftBatch is the body of buildFromLeftInput that templates out the
+// fact whether the current left batch has a selection vector or not.
+// execgen:inline
+const _ = "inlined_buildFromLeftBatch_true"
+
+// buildFromLeftBatch is the body of buildFromLeftInput that templates out the
+// fact whether the current left batch has a selection vector or not.
+// execgen:inline
+const _ = "inlined_buildFromLeftBatch_false"
diff --git a/pkg/sql/colexec/colexecjoin/crossjoiner.go b/pkg/sql/colexec/colexecjoin/crossjoiner.go
index 01da98daa21d..7029d9560378 100644
--- a/pkg/sql/colexec/colexecjoin/crossjoiner.go
+++ b/pkg/sql/colexec/colexecjoin/crossjoiner.go
@@ -63,16 +63,18 @@ type crossJoiner struct {
 	*joinHelper
 
 	unlimitedAllocator    *colmem.Allocator
-	inputsConsumed        bool
+	rightInputConsumed    bool
 	outputTypes           []*types.T
 	maxOutputBatchMemSize int64
-	numTotalOutputTuples  int
-	numAlreadyEmitted     int
 	// isLeftAllNulls and isRightAllNulls indicate whether the output vectors
 	// corresponding to the left and right inputs, respectively, should consist
 	// only of NULL values. This is the case when we have right or left,
-	// respectively, unmatched tuples. Note that only one can be set to true.
+	// respectively, unmatched tuples.
 	isLeftAllNulls, isRightAllNulls bool
+	// done indicates that the cross joiner has fully built its output and
+	// closed the spilling queue. Once set to true, only zero-length batches are
+	// emitted.
+	done bool
 }
 
 var _ colexecop.ClosableOperator = &crossJoiner{}
@@ -88,17 +90,21 @@ func (c *crossJoiner) Init(ctx context.Context) {
 }
 
 func (c *crossJoiner) Next() coldata.Batch {
-	if !c.inputsConsumed {
-		c.consumeInputs(c.Ctx)
+	if c.done {
+		return coldata.ZeroBatch
+	}
+	if !c.rightInputConsumed {
+		c.consumeRightInput(c.Ctx)
 		c.setupForBuilding()
 	}
-	if c.numTotalOutputTuples == c.numAlreadyEmitted {
+	willEmit := c.willEmit()
+	if willEmit == 0 {
 		if err := c.Close(); err != nil {
 			colexecerror.InternalError(err)
 		}
+		c.done = true
 		return coldata.ZeroBatch
 	}
-	willEmit := c.numTotalOutputTuples - c.numAlreadyEmitted
 	c.output, _ = c.unlimitedAllocator.ResetMaybeReallocate(
 		c.outputTypes, c.output, willEmit, c.maxOutputBatchMemSize,
 	)
@@ -120,112 +126,157 @@ func (c *crossJoiner) Next() coldata.Batch {
 		}
 	}
 	c.output.SetLength(willEmit)
-	c.numAlreadyEmitted += willEmit
+	c.builderState.numEmittedCurLeftBatch += willEmit
+	c.builderState.numEmittedTotal += willEmit
 	return c.output
 }
 
-// consumeInputs determines the kind of information the cross joiner needs from
-// its inputs (in some cases, we don't need to buffer all input tuples) and
-// consumes the inputs accordingly.
-func (c *crossJoiner) consumeInputs(ctx context.Context) {
-	c.inputsConsumed = true
-	var needLeftTuples bool
+// readNextLeftBatch fetches the next batch from the left input, prepares the
+// builder for it (assuming that all rows in the batch contribute to the cross
+// product), and returns the length of the batch.
+func (c *crossJoiner) readNextLeftBatch() int {
+	leftBatch := c.inputOne.Next()
+	c.prepareForNextLeftBatch(leftBatch, 0 /* startIdx */, leftBatch.Length())
+	return leftBatch.Length()
+}
+
+// consumeRightInput determines the kind of information the cross joiner needs
+// from its right input (in some cases, we don't need to buffer all tuples from
+// the right) and consumes the right input accordingly. It also checks whether
+// we need any tuples from the left and possibly reads a single batch, depending
+// on the join type.
+func (c *crossJoiner) consumeRightInput(ctx context.Context) {
+	c.rightInputConsumed = true
 	var needRightTuples, needOnlyNumRightTuples bool
 	switch c.joinType {
 	case descpb.InnerJoin, descpb.LeftOuterJoin, descpb.RightOuterJoin, descpb.FullOuterJoin:
-		needLeftTuples = true
+		c.needLeftTuples = true
 		needRightTuples = true
 	case descpb.LeftSemiJoin:
 		// With LEFT SEMI join we only need to know whether the right input is
 		// empty or not.
-		c.right.numTuples = c.inputTwo.Next().Length()
-		needLeftTuples = c.right.numTuples != 0
+		c.numRightTuples = c.inputTwo.Next().Length()
+		c.needLeftTuples = c.numRightTuples != 0
 	case descpb.RightSemiJoin:
 		// With RIGHT SEMI join we only need to know whether the left input is
 		// empty or not.
-		c.left.numTuples = c.inputOne.Next().Length()
-		needRightTuples = c.left.numTuples != 0
+		needRightTuples = c.readNextLeftBatch() != 0
 	case descpb.LeftAntiJoin:
 		// With LEFT ANTI join we only need to know whether the right input is
 		// empty or not.
-		c.right.numTuples = c.inputTwo.Next().Length()
-		needLeftTuples = c.right.numTuples == 0
+		c.numRightTuples = c.inputTwo.Next().Length()
+		c.needLeftTuples = c.numRightTuples == 0
 	case descpb.RightAntiJoin:
 		// With RIGHT ANTI join we only need to know whether the left input is
 		// empty or not.
-		c.left.numTuples = c.inputOne.Next().Length()
-		needRightTuples = c.left.numTuples == 0
+		needRightTuples = c.readNextLeftBatch() == 0
 	case descpb.IntersectAllJoin, descpb.ExceptAllJoin:
 		// With set-operation joins we only need the number of tuples from the
 		// right input.
-		needLeftTuples = true
+		c.needLeftTuples = true
 		needOnlyNumRightTuples = true
 	default:
 		colexecerror.InternalError(errors.AssertionFailedf("unexpected join type %s", c.joinType.String()))
 	}
-	if needRightTuples && needOnlyNumRightTuples {
-		colexecerror.InternalError(errors.AssertionFailedf("both needRightTuples and needOnlyNumRightTuples are true"))
-	}
-
-	if needLeftTuples {
-		for {
-			batch := c.inputOne.Next()
-			c.left.tuples.Enqueue(ctx, batch)
-			if batch.Length() == 0 {
-				break
-			}
-			c.left.numTuples += batch.Length()
-		}
-	}
-	if needRightTuples {
+	if needRightTuples || needOnlyNumRightTuples {
 		for {
 			batch := c.inputTwo.Next()
-			c.right.tuples.Enqueue(ctx, batch)
-			if batch.Length() == 0 {
-				break
+			if needRightTuples {
+				c.rightTuples.Enqueue(ctx, batch)
 			}
-			c.right.numTuples += batch.Length()
-		}
-	}
-	if needOnlyNumRightTuples {
-		for {
-			batch := c.inputTwo.Next()
 			if batch.Length() == 0 {
 				break
 			}
-			c.right.numTuples += batch.Length()
+			c.numRightTuples += batch.Length()
 		}
 	}
 }
 
+// setupForBuilding prepares the cross joiner to build the output. This method
+// must be called after the right input has been fully "processed" (which might
+// mean it wasn't fully read, depending on the join type).
 func (c *crossJoiner) setupForBuilding() {
-	c.numTotalOutputTuples = c.calculateOutputCount()
-
 	switch c.joinType {
 	case descpb.LeftOuterJoin:
-		c.isRightAllNulls = c.right.numTuples == 0
+		c.isRightAllNulls = c.numRightTuples == 0
 	case descpb.RightOuterJoin:
-		c.isLeftAllNulls = c.left.numTuples == 0
+		c.isLeftAllNulls = c.readNextLeftBatch() == 0
 	case descpb.FullOuterJoin:
-		c.isLeftAllNulls = c.left.numTuples == 0
-		c.isRightAllNulls = c.right.numTuples == 0
-	}
-	// In order for buildFrom*Input methods to work in the unmatched cases, we
-	// "lie" that there is a single tuple on the opposite side which results in
-	// the builder methods repeating the tuples only once, and that's exactly
-	// what we want.
-	if c.isLeftAllNulls {
-		c.left.numTuples = 1
+		c.isLeftAllNulls = c.readNextLeftBatch() == 0
+		c.isRightAllNulls = c.numRightTuples == 0
+	case descpb.ExceptAllJoin:
+		// For EXCEPT ALL joins we build # left tuples - # right tuples output
+		// rows (if positive), so we have to discard first numRightTuples rows
+		// from the left.
+		for c.numRightTuples > 0 {
+			leftBatch := c.inputOne.Next()
+			c.builderState.left.currentBatch = leftBatch
+			if leftBatch.Length() == 0 {
+				break
+			} else if leftBatch.Length() > c.numRightTuples {
+				// The current left batch is the first one that contains tuples
+				// without a "match".
+				c.prepareForNextLeftBatch(leftBatch, c.numRightTuples, leftBatch.Length())
+				break
+			}
+			c.numRightTuples -= leftBatch.Length()
+		}
 	}
+	// In order for canEmit method to work in the unmatched cases, we "lie"
+	// that there is a single tuple on the right side which results in the
+	// builder method repeating the tuples only once, and that's exactly what we
+	// want.
 	if c.isRightAllNulls {
-		c.right.numTuples = 1
+		c.numRightTuples = 1
 	}
-	c.setupBuilder()
-	if c.isLeftAllNulls {
-		c.left.numTuples = 0
+	c.setupLeftBuilder()
+}
+
+// willEmit returns the number of tuples the cross joiner will emit based on the
+// current left batch. If the current left batch is exhausted, then a new left
+// batch is fetched. If 0 is returned, then the cross joiner has fully emitted
+// the output.
+func (c *crossJoiner) willEmit() int {
+	if c.needLeftTuples {
+		if c.isLeftAllNulls {
+			if c.isRightAllNulls {
+				// This can happen only in FULL OUTER join when both inputs are
+				// empty.
+				return 0
+			}
+			// All tuples from the right are unmatched and will be emitted once.
+			c.builderState.setup.rightNumRepeats = 1
+			return c.numRightTuples - c.builderState.numEmittedCurLeftBatch
+		}
+		if c.builderState.left.currentBatch == nil || c.canEmit() == 0 {
+			// Get the next left batch if we haven't fetched one yet or we
+			// have fully built the output using the current left batch.
+			if c.readNextLeftBatch() == 0 {
+				return 0
+			}
+		}
+		return c.canEmit()
 	}
-	if c.isRightAllNulls {
-		c.right.numTuples = 0
+	switch c.joinType {
+	case descpb.LeftSemiJoin, descpb.LeftAntiJoin:
+		// We don't need the left tuples, and in case of LEFT SEMI/ANTI this
+		// means that the right input was empty/non-empty, so the cross join
+		// is empty.
+		return 0
+	case descpb.RightSemiJoin, descpb.RightAntiJoin:
+		if c.numRightTuples == 0 {
+			// For RIGHT SEMI, we didn't fetch any right tuples if the left
+			// input was empty; for RIGHT ANTI - if the left input wasn't
+			// empty. In both such cases the cross join is empty.
+			return 0
+		}
+		return c.canEmit()
+	default:
+		colexecerror.InternalError(errors.AssertionFailedf(
+			"unexpectedly don't need left tuples for %s", c.joinType,
+		))
+		// This code is unreachable, but the compiler cannot infer that.
+		return 0
 	}
 }
 
@@ -245,15 +296,12 @@ func (c *crossJoiner) Reset(ctx context.Context) {
 		r.Reset(ctx)
 	}
 	c.crossJoinerBase.Reset(ctx)
-	c.inputsConsumed = false
-	c.numTotalOutputTuples = 0
-	c.numAlreadyEmitted = 0
+	c.rightInputConsumed = false
 	c.isLeftAllNulls = false
 	c.isRightAllNulls = false
+	c.done = false
 }
 
-// TODO(yuzefovich): use two separate unlimited allocators giving the right side
-// larger limit (since it might need to be read multiple times).
 func newCrossJoinerBase(
 	unlimitedAllocator *colmem.Allocator,
 	joinType descpb.JoinType,
@@ -269,32 +317,22 @@ func newCrossJoinerBase(
 			unlimitedAllocator:    unlimitedAllocator,
 			types:                 leftTypes,
 			canonicalTypeFamilies: typeconv.ToCanonicalTypeFamilies(leftTypes),
-			tuples: colexecutils.NewSpillingQueue(
-				&colexecutils.NewSpillingQueueArgs{
-					UnlimitedAllocator: unlimitedAllocator,
-					Types:              leftTypes,
-					MemoryLimit:        memoryLimit,
-					DiskQueueCfg:       cfg,
-					FDSemaphore:        fdSemaphore,
-					DiskAcc:            diskAcc,
-				},
-			),
 		},
 		right: cjState{
 			unlimitedAllocator:    unlimitedAllocator,
 			types:                 rightTypes,
 			canonicalTypeFamilies: typeconv.ToCanonicalTypeFamilies(rightTypes),
-			tuples: colexecutils.NewRewindableSpillingQueue(
-				&colexecutils.NewSpillingQueueArgs{
-					UnlimitedAllocator: unlimitedAllocator,
-					Types:              rightTypes,
-					MemoryLimit:        memoryLimit,
-					DiskQueueCfg:       cfg,
-					FDSemaphore:        fdSemaphore,
-					DiskAcc:            diskAcc,
-				},
-			),
 		},
+		rightTuples: colexecutils.NewRewindableSpillingQueue(
+			&colexecutils.NewSpillingQueueArgs{
+				UnlimitedAllocator: unlimitedAllocator,
+				Types:              rightTypes,
+				MemoryLimit:        memoryLimit,
+				DiskQueueCfg:       cfg,
+				FDSemaphore:        fdSemaphore,
+				DiskAcc:            diskAcc,
+			},
+		),
 	}
 	if joinType.ShouldIncludeLeftColsInOutput() {
 		base.builderState.rightColOffset = len(leftTypes)
@@ -303,12 +341,26 @@ func newCrossJoinerBase(
 }
 
 type crossJoinerBase struct {
-	initHelper   colexecop.InitHelper
-	joinType     descpb.JoinType
-	left, right  cjState
-	builderState struct {
+	initHelper     colexecop.InitHelper
+	joinType       descpb.JoinType
+	left, right    cjState
+	numRightTuples int
+	rightTuples    *colexecutils.SpillingQueue
+	needLeftTuples bool
+	builderState   struct {
 		setup       cjBuilderSetupState
 		left, right cjMutableBuilderState
+
+		// numEmittedCurLeftBatch tracks the number of joined rows returned
+		// based on the current left batch. It is reset on every call to
+		// prepareForNextLeftBatch.
+		numEmittedCurLeftBatch int
+
+		// numEmittedTotal tracks the number of rows that have been emitted
+		// since the crossJoinerBase has been reset. It is only used in RIGHT
+		// SEMI, RIGHT ANTI, and INTERSECT ALL joins.
+		numEmittedTotal int
+
 		// rightColOffset indicates the number of vectors in the output batch
 		// that should be "skipped" when building from the right input.
 		rightColOffset int
@@ -320,142 +372,108 @@ func (b *crossJoinerBase) init(ctx context.Context) {
 	b.initHelper.Init(ctx)
 }
 
-func (b *crossJoinerBase) setupBuilder() {
-	switch b.joinType {
-	case descpb.IntersectAllJoin:
-		// For INTERSECT ALL joins we build min(left.numTuples, right.numTuples)
-		// tuples.
-		if b.left.numTuples < b.right.numTuples {
-			b.builderState.setup.leftSrcEndIdx = b.left.numTuples
-		} else {
-			b.builderState.setup.leftSrcEndIdx = b.right.numTuples
-		}
-	case descpb.ExceptAllJoin:
-		// For EXCEPT ALL joins we build left.numTuples-right.numTuples tuples
-		// (if positive).
-		if b.left.numTuples > b.right.numTuples {
-			b.builderState.setup.leftSrcEndIdx = b.left.numTuples - b.right.numTuples
-		}
-	default:
-		b.builderState.setup.leftSrcEndIdx = b.left.numTuples
-	}
+func (b *crossJoinerBase) setupLeftBuilder() {
 	switch b.joinType {
 	case descpb.LeftSemiJoin, descpb.IntersectAllJoin, descpb.ExceptAllJoin:
 		b.builderState.setup.leftNumRepeats = 1
 	case descpb.LeftAntiJoin:
 		// LEFT ANTI cross join emits all left tuples repeated once only if the
 		// right input is empty.
-		if b.right.numTuples == 0 {
+		if b.numRightTuples == 0 {
 			b.builderState.setup.leftNumRepeats = 1
 		}
 	default:
-		b.builderState.setup.leftNumRepeats = b.right.numTuples
+		b.builderState.setup.leftNumRepeats = b.numRightTuples
 	}
+}
+
+// prepareForNextLeftBatch sets up the crossJoinerBase to build based on a new
+// batch coming from the left input. Only rows with ordinals in
+// [startIdx, endIdx) range will be used for the cross join.
+func (b *crossJoinerBase) prepareForNextLeftBatch(batch coldata.Batch, startIdx, endIdx int) {
+	b.builderState.numEmittedCurLeftBatch = 0
+	b.builderState.left.currentBatch = batch
+	b.builderState.left.curSrcStartIdx = startIdx
+	b.builderState.left.numRepeatsIdx = 0
+	b.builderState.right.numRepeatsIdx = 0
+
+	if b.joinType == descpb.IntersectAllJoin {
+		// Intersect all is special because we need to count how many tuples
+		// from the right we have already used up.
+		if b.builderState.numEmittedTotal+endIdx-startIdx >= b.numRightTuples {
+			// The current left batch is the last one that contains tuples with
+			// a "match".
+			b.builderState.setup.leftSrcEndIdx = b.numRightTuples - b.builderState.numEmittedTotal + startIdx
+		} else {
+			// The current left batch is still emitted fully.
+			b.builderState.setup.leftSrcEndIdx = endIdx
+		}
+	} else {
+		b.builderState.setup.leftSrcEndIdx = endIdx
+	}
+
 	switch b.joinType {
-	case descpb.RightSemiJoin:
+	case descpb.InnerJoin, descpb.LeftOuterJoin, descpb.RightOuterJoin, descpb.FullOuterJoin:
+		b.builderState.setup.rightNumRepeats = endIdx - startIdx
+	case descpb.RightSemiJoin, descpb.RightAntiJoin:
 		b.builderState.setup.rightNumRepeats = 1
-	case descpb.RightAntiJoin:
-		// RIGHT ANTI cross join emits all right tuples repeated once only if
-		// the left input is empty.
-		if b.left.numTuples == 0 {
-			b.builderState.setup.rightNumRepeats = 1
-		}
-	default:
-		b.builderState.setup.rightNumRepeats = b.left.numTuples
 	}
 }
 
-// calculateOutputCount returns the total number of tuples that are emitted by
-// the cross join given already initialized left and right side states.
-func (b *crossJoinerBase) calculateOutputCount() int {
+// canEmit returns the number of output rows that can still be emitted based on
+// the current left batch. It supports only the case when both left and right
+// inputs are not empty.
+func (b *crossJoinerBase) canEmit() int {
 	switch b.joinType {
-	case descpb.InnerJoin:
-		return b.left.numTuples * b.right.numTuples
-	case descpb.LeftOuterJoin:
-		if b.right.numTuples == 0 {
-			return b.left.numTuples
-		}
-		return b.left.numTuples * b.right.numTuples
-	case descpb.RightOuterJoin:
-		if b.left.numTuples == 0 {
-			return b.right.numTuples
-		}
-		return b.left.numTuples * b.right.numTuples
-	case descpb.FullOuterJoin:
-		if b.left.numTuples == 0 || b.right.numTuples == 0 {
-			return b.left.numTuples + b.right.numTuples
-		}
-		return b.left.numTuples * b.right.numTuples
-	case descpb.LeftSemiJoin:
-		if b.right.numTuples == 0 {
+	case descpb.LeftSemiJoin, descpb.IntersectAllJoin, descpb.ExceptAllJoin:
+		return b.builderState.setup.leftSrcEndIdx - b.builderState.left.curSrcStartIdx
+	case descpb.LeftAntiJoin:
+		if b.numRightTuples != 0 {
 			return 0
 		}
-		return b.left.numTuples
+		return b.builderState.setup.leftSrcEndIdx - b.builderState.left.curSrcStartIdx
 	case descpb.RightSemiJoin:
-		if b.left.numTuples == 0 {
-			return 0
-		}
-		return b.right.numTuples
-	case descpb.LeftAntiJoin:
-		if b.right.numTuples != 0 {
+		// RIGHT SEMI cross join emits all right tuples repeated once iff the
+		// left input is not empty.
+		if b.builderState.setup.leftSrcEndIdx == b.builderState.left.curSrcStartIdx {
 			return 0
 		}
-		return b.left.numTuples
+		return b.numRightTuples - b.builderState.numEmittedTotal
 	case descpb.RightAntiJoin:
-		if b.left.numTuples != 0 {
-			return 0
-		}
-		return b.right.numTuples
-	case descpb.IntersectAllJoin:
-		if b.right.numTuples < b.left.numTuples {
-			return b.right.numTuples
-		}
-		return b.left.numTuples
-	case descpb.ExceptAllJoin:
-		if b.right.numTuples > b.left.numTuples {
+		// RIGHT ANTI cross join emits all right tuples repeated once iff the
+		// left input is empty.
+		if b.builderState.setup.leftSrcEndIdx != b.builderState.left.curSrcStartIdx {
 			return 0
 		}
-		return b.left.numTuples - b.right.numTuples
+		return b.numRightTuples - b.builderState.numEmittedTotal
 	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unexpected join type %s", b.joinType.String()))
-		// Unreachable code.
-		return 0
+		return b.builderState.setup.rightNumRepeats*b.numRightTuples - b.builderState.numEmittedCurLeftBatch
 	}
 }
 
 func (b *crossJoinerBase) Reset(ctx context.Context) {
-	if b.left.tuples != nil {
-		b.left.tuples.Reset(ctx)
-	}
-	if b.right.tuples != nil {
-		b.right.tuples.Reset(ctx)
+	if b.rightTuples != nil {
+		b.rightTuples.Reset(ctx)
 	}
-	b.left.numTuples = 0
-	b.right.numTuples = 0
+	b.numRightTuples = 0
 	b.builderState.left.reset()
 	b.builderState.right.reset()
+	b.builderState.numEmittedCurLeftBatch = 0
+	b.builderState.numEmittedTotal = 0
 }
 
 func (b *crossJoinerBase) Close() error {
 	ctx := b.initHelper.EnsureCtx()
-	var lastErr error
-	if b.left.tuples != nil {
-		lastErr = b.left.tuples.Close(ctx)
-	}
-	if b.right.tuples != nil {
-		if err := b.right.tuples.Close(ctx); err != nil {
-			lastErr = err
-		}
+	if b.rightTuples != nil {
+		return b.rightTuples.Close(ctx)
 	}
-	return lastErr
+	return nil
 }
 
 type cjState struct {
 	unlimitedAllocator    *colmem.Allocator
 	types                 []*types.T
 	canonicalTypeFamilies []types.Family
-	tuples                *colexecutils.SpillingQueue
-	numTuples             int
 }
 
 type cjBuilderSetupState struct {
@@ -481,16 +499,10 @@ type cjMutableBuilderState struct {
 	// numRepeatsIdx tracks the number of times a "group" has already been
 	// repeated.
 	numRepeatsIdx int
-	// setOpLeftSrcIdx tracks the current tuple's index from the left input for
-	// set operation joins. INTERSECT ALL and EXCEPT ALL joins are special
-	// because they need to build the output partially (namely, for exactly
-	// leftSrcEndIdx number of tuples which could span multiple batches).
-	setOpLeftSrcIdx int
 }
 
 func (s *cjMutableBuilderState) reset() {
 	s.currentBatch = nil
 	s.curSrcStartIdx = 0
 	s.numRepeatsIdx = 0
-	s.setOpLeftSrcIdx = 0
 }
diff --git a/pkg/sql/colexec/colexecjoin/crossjoiner_tmpl.go b/pkg/sql/colexec/colexecjoin/crossjoiner_tmpl.go
index 4f097649719c..e01a6da7316e 100644
--- a/pkg/sql/colexec/colexecjoin/crossjoiner_tmpl.go
+++ b/pkg/sql/colexec/colexecjoin/crossjoiner_tmpl.go
@@ -38,198 +38,125 @@ var (
 	_ = types.BoolFamily
 )
 
+// buildFromLeftBatch is the body of buildFromLeftInput that templates out the
+// fact whether the current left batch has a selection vector or not.
+// execgen:inline
+// execgen:template<hasSel>
+func buildFromLeftBatch(b *crossJoinerBase, currentBatch coldata.Batch, sel []int, hasSel bool) {
+	if !hasSel {
+		// Remove the unused warning.
+		_ = sel
+	}
+	// We'll be modifying the builder state as we go, but we'll need to be able
+	// to restore the state to initial for each column.
+	initialBuilderState := b.builderState.left
+	bs := &b.builderState.left
+	leftNumRepeats := b.builderState.setup.leftNumRepeats
+	leftSrcEndIdx := b.builderState.setup.leftSrcEndIdx
+	outputCapacity := b.output.Capacity()
+	var srcStartIdx int
+	// Loop over every column.
+	for colIdx := range b.left.types {
+		if colIdx > 0 {
+			// Restore the builder state so that this new column started off
+			// fresh.
+			*bs = initialBuilderState
+		}
+		outStartIdx := destStartIdx
+		src := currentBatch.ColVec(colIdx)
+		srcNulls := src.Nulls()
+		out := b.output.ColVec(colIdx)
+		outNulls := out.Nulls()
+		switch b.left.canonicalTypeFamilies[colIdx] {
+		// {{range .}}
+		case _CANONICAL_TYPE_FAMILY:
+			switch b.left.types[colIdx].Width() {
+			// {{range .WidthOverloads}}
+			case _TYPE_WIDTH:
+				srcCol := src.TemplateType()
+				outCol := out.TemplateType()
+				if leftNumRepeats == 1 {
+					// Loop over every tuple in the current batch.
+					for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+						// Repeat each tuple one time.
+						if hasSel {
+							srcStartIdx = sel[bs.curSrcStartIdx]
+						} else {
+							srcStartIdx = bs.curSrcStartIdx
+						}
+						if srcNulls.NullAt(srcStartIdx) {
+							outNulls.SetNull(outStartIdx)
+						} else {
+							val := srcCol.Get(srcStartIdx)
+							outCol.Set(outStartIdx, val)
+						}
+						outStartIdx++
+						bs.curSrcStartIdx++
+					}
+				} else {
+					// Loop over every tuple in the current batch.
+					for bs.curSrcStartIdx < leftSrcEndIdx && outStartIdx < outputCapacity {
+						// Repeat each row leftNumRepeats times.
+						if hasSel {
+							srcStartIdx = sel[bs.curSrcStartIdx]
+						} else {
+							srcStartIdx = bs.curSrcStartIdx
+						}
+						toAppend := leftNumRepeats - bs.numRepeatsIdx
+						if outStartIdx+toAppend > outputCapacity {
+							// We don't have enough space to repeat the current
+							// value the required number of times, so we'll have
+							// to continue from here on the next call.
+							toAppend = outputCapacity - outStartIdx
+							bs.numRepeatsIdx += toAppend
+						} else {
+							// We fully processed the current tuple for the
+							// current column, and before moving on to the next
+							// one, we need to reset numRepeatsIdx (so that the
+							// next tuple would be repeated leftNumRepeats
+							// times).
+							bs.curSrcStartIdx++
+							bs.numRepeatsIdx = 0
+						}
+						if srcNulls.NullAt(srcStartIdx) {
+							outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
+							outStartIdx += toAppend
+						} else {
+							val := srcCol.Get(srcStartIdx)
+							for i := 0; i < toAppend; i++ {
+								outCol.Set(outStartIdx, val)
+								outStartIdx++
+							}
+						}
+					}
+				}
+				// {{end}}
+			}
+		// {{end}}
+		default:
+			colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", b.left.types[colIdx].String()))
+		}
+	}
+}
+
 // buildFromLeftInput builds part of the output of a cross join that comes from
 // the vectors of the left input. The new output tuples are put starting at
 // index destStartIdx and will not exceed the capacity of the output batch. It
-// is assumed that setupBuilder has been called.
+// is assumed that setupLeftBuilder and prepareForNextLeftBatch have been
+// called.
 //
 // The goal of this method is to repeat each tuple from the left input
-// leftNumRepeats times. For set-operation joins only first setOpLeftSrcIdx
-// tuples are built from.
+// leftNumRepeats times. Only the tuples in [curSrcStartIdx, leftSrcEndIdx) are
+// used from the current left batch.
 func (b *crossJoinerBase) buildFromLeftInput(ctx context.Context, destStartIdx int) {
-	var err error
 	currentBatch := b.builderState.left.currentBatch
-	if currentBatch == nil || b.builderState.left.curSrcStartIdx == currentBatch.Length() {
-		// We need to get the next batch to build from if it is the first one or
-		// we have fully processed the previous one.
-		currentBatch, err = b.left.tuples.Dequeue(ctx)
-		if err != nil {
-			colexecerror.InternalError(err)
-		}
-		b.builderState.left.currentBatch = currentBatch
-		b.builderState.left.curSrcStartIdx = 0
-		b.builderState.left.numRepeatsIdx = 0
-	}
-	initialBuilderState := b.builderState.left
 	b.left.unlimitedAllocator.PerformOperation(
 		b.output.ColVecs()[:len(b.left.types)],
 		func() {
-			leftNumRepeats := b.builderState.setup.leftNumRepeats
-			isSetOp := b.joinType.IsSetOpJoin()
-			outputCapacity := b.output.Capacity()
-			batchLength := currentBatch.Length()
-			for batchLength > 0 {
-				// Loop over every column.
-			LeftColLoop:
-				for colIdx := range b.left.types {
-					outStartIdx := destStartIdx
-					src := currentBatch.ColVec(colIdx)
-					srcNulls := src.Nulls()
-					out := b.output.ColVec(colIdx)
-					outNulls := out.Nulls()
-					switch b.left.canonicalTypeFamilies[colIdx] {
-					// {{range .}}
-					case _CANONICAL_TYPE_FAMILY:
-						switch b.left.types[colIdx].Width() {
-						// {{range .WidthOverloads}}
-						case _TYPE_WIDTH:
-							srcCol := src.TemplateType()
-							outCol := out.TemplateType()
-							if leftNumRepeats == 1 {
-								// Loop over every tuple in the current batch.
-								for b.builderState.left.curSrcStartIdx < batchLength {
-									// Repeat each tuple one time.
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
-										}
-										b.builderState.left.setOpLeftSrcIdx++
-									}
-
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNull(outStartIdx)
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										outCol.Set(outStartIdx, val)
-									}
-									outStartIdx++
-									b.builderState.left.curSrcStartIdx++
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-								}
-							} else {
-								// Loop over every tuple in the current batch.
-								for ; b.builderState.left.curSrcStartIdx < batchLength; b.builderState.left.curSrcStartIdx++ {
-									// Repeat each row leftNumRepeats times.
-									srcStartIdx := b.builderState.left.curSrcStartIdx
-									toAppend := leftNumRepeats - b.builderState.left.numRepeatsIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
-									}
-
-									if isSetOp {
-										if b.builderState.left.setOpLeftSrcIdx == b.builderState.setup.leftSrcEndIdx {
-											// We have fully materialized first leftSrcEndIdx
-											// tuples in the current column, so we need to
-											// either transition to the next column or exit.
-											if colIdx == len(b.left.types)-1 {
-												// This is the last column.
-												return
-											}
-											// We need to start building the next column
-											// with the same initial builder state as the
-											// current column.
-											b.builderState.left = initialBuilderState
-											continue LeftColLoop
-										}
-										b.builderState.left.setOpLeftSrcIdx += toAppend
-									}
-
-									if srcNulls.NullAt(srcStartIdx) {
-										outNulls.SetNullRange(outStartIdx, outStartIdx+toAppend)
-										outStartIdx += toAppend
-									} else {
-										val := srcCol.Get(srcStartIdx)
-										for i := 0; i < toAppend; i++ {
-											outCol.Set(outStartIdx, val)
-											outStartIdx++
-										}
-									}
-
-									if outStartIdx == outputCapacity {
-										// We reached the capacity of the output vector,
-										// so we move to the next column.
-										if colIdx == len(b.left.types)-1 {
-											// This is the last column.
-											b.builderState.left.numRepeatsIdx += toAppend
-											if b.builderState.left.numRepeatsIdx == leftNumRepeats {
-												// The current tuple has already been repeated
-												// the desired number of times, so we advance
-												// the source index.
-												b.builderState.left.curSrcStartIdx++
-												b.builderState.left.numRepeatsIdx = 0
-											}
-											return
-										}
-										// We need to start building the next column
-										// with the same initial builder state as the
-										// current column.
-										b.builderState.left = initialBuilderState
-										continue LeftColLoop
-									}
-									// We fully processed the current tuple for the current
-									// column, and before moving on to the next one, we need
-									// to reset numRepeatsIdx (so that the next tuple would
-									// be repeated leftNumRepeats times).
-									b.builderState.left.numRepeatsIdx = 0
-								}
-							}
-							// {{end}}
-						}
-					// {{end}}
-					default:
-						colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", b.left.types[colIdx].String()))
-					}
-					if colIdx == len(b.left.types)-1 {
-						// We have appended some tuples into the output batch from the current
-						// batch (the latter is now fully processed), so we need to adjust
-						// destStartIdx accordingly for the next batch.
-						destStartIdx = outStartIdx
-					} else {
-						b.builderState.left = initialBuilderState
-					}
-				}
-				// We have processed all tuples in the current batch from the
-				// buffered group, so we need to Dequeue the next one.
-				currentBatch, err = b.left.tuples.Dequeue(ctx)
-				if err != nil {
-					colexecerror.InternalError(err)
-				}
-				b.builderState.left.currentBatch = currentBatch
-				batchLength = currentBatch.Length()
-				// We have transitioned to building from a new batch, so we
-				// need to update the builder state to build from the beginning
-				// of the new batch.
-				b.builderState.left.curSrcStartIdx = 0
-				b.builderState.left.numRepeatsIdx = 0
-				// We also need to update 'initialBuilderState' so that the
-				// builder state gets reset correctly in-between different
-				// columns in the loop above.
-				initialBuilderState = b.builderState.left
+			if sel := currentBatch.Selection(); sel != nil {
+				buildFromLeftBatch(b, currentBatch, sel, true /* hasSel */)
+			} else {
+				buildFromLeftBatch(b, currentBatch, nil, false /* hasSel */)
 			}
 		},
 	)
@@ -238,31 +165,35 @@ func (b *crossJoinerBase) buildFromLeftInput(ctx context.Context, destStartIdx i
 // buildFromRightInput builds part of the output of a cross join that comes from
 // the vectors of the right input. The new output tuples are put starting at
 // index destStartIdx and will not exceed the capacity of the output batch. It
-// is assumed that setupBuilder has been called.
+// is assumed that the right input has been fully consumed and is stored in
+// b.rightTuples spilling queue.
 //
 // The goal of this method is to repeat all tuples from the right input
 // rightNumRepeats times (i.e. repeating the whole list of tuples at once).
 func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx int) {
 	var err error
+	bs := &b.builderState.right
+	rightNumRepeats := b.builderState.setup.rightNumRepeats
 	b.right.unlimitedAllocator.PerformOperation(
 		b.output.ColVecs()[b.builderState.rightColOffset:],
 		func() {
 			outStartIdx := destStartIdx
 			outputCapacity := b.output.Capacity()
-			// Repeat the buffered tuples rightNumRepeats times.
-			for ; b.builderState.right.numRepeatsIdx < b.builderState.setup.rightNumRepeats; b.builderState.right.numRepeatsIdx++ {
-				currentBatch := b.builderState.right.currentBatch
+			// Repeat the buffered tuples rightNumRepeats times until we fill
+			// the output capacity.
+			for ; outStartIdx < outputCapacity && bs.numRepeatsIdx < rightNumRepeats; bs.numRepeatsIdx++ {
+				currentBatch := bs.currentBatch
 				if currentBatch == nil {
-					currentBatch, err = b.right.tuples.Dequeue(ctx)
+					currentBatch, err = b.rightTuples.Dequeue(ctx)
 					if err != nil {
 						colexecerror.InternalError(err)
 					}
-					b.builderState.right.currentBatch = currentBatch
-					b.builderState.right.curSrcStartIdx = 0
+					bs.currentBatch = currentBatch
+					bs.curSrcStartIdx = 0
 				}
 				batchLength := currentBatch.Length()
 				for batchLength > 0 {
-					toAppend := batchLength - b.builderState.right.curSrcStartIdx
+					toAppend := batchLength - bs.curSrcStartIdx
 					if outStartIdx+toAppend > outputCapacity {
 						toAppend = outputCapacity - outStartIdx
 					}
@@ -285,10 +216,10 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 								// Optimization in the case that group length is 1, use assign
 								// instead of copy.
 								if toAppend == 1 {
-									if srcNulls.NullAt(b.builderState.right.curSrcStartIdx) {
+									if srcNulls.NullAt(bs.curSrcStartIdx) {
 										outNulls.SetNull(outStartIdx)
 									} else {
-										v := srcCol.Get(b.builderState.right.curSrcStartIdx)
+										v := srcCol.Get(bs.curSrcStartIdx)
 										outCol.Set(outStartIdx, v)
 									}
 								} else {
@@ -296,8 +227,8 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 										coldata.SliceArgs{
 											Src:         src,
 											DestIdx:     outStartIdx,
-											SrcStartIdx: b.builderState.right.curSrcStartIdx,
-											SrcEndIdx:   b.builderState.right.curSrcStartIdx + toAppend,
+											SrcStartIdx: bs.curSrcStartIdx,
+											SrcEndIdx:   bs.curSrcStartIdx + toAppend,
 										},
 									)
 								}
@@ -310,34 +241,28 @@ func (b *crossJoinerBase) buildFromRightInput(ctx context.Context, destStartIdx
 					}
 					outStartIdx += toAppend
 
-					if toAppend < batchLength-b.builderState.right.curSrcStartIdx {
+					if toAppend < batchLength-bs.curSrcStartIdx {
 						// If we haven't materialized all the tuples from the
 						// batch, then we are ready to emit the output batch.
-						b.builderState.right.curSrcStartIdx += toAppend
+						bs.curSrcStartIdx += toAppend
 						return
 					}
 					// We have fully processed the current batch, so we need to
 					// get the next one.
-					currentBatch, err = b.right.tuples.Dequeue(ctx)
+					currentBatch, err = b.rightTuples.Dequeue(ctx)
 					if err != nil {
 						colexecerror.InternalError(err)
 					}
-					b.builderState.right.currentBatch = currentBatch
+					bs.currentBatch = currentBatch
 					batchLength = currentBatch.Length()
-					b.builderState.right.curSrcStartIdx = 0
-
-					if outStartIdx == outputCapacity {
-						// We reached the capacity of the output batch, so we
-						// can emit it.
-						return
-					}
+					bs.curSrcStartIdx = 0
 				}
 				// We have fully processed all the batches from the right side,
 				// so we need to Rewind the queue.
-				if err := b.right.tuples.Rewind(); err != nil {
+				if err := b.rightTuples.Rewind(); err != nil {
 					colexecerror.InternalError(err)
 				}
-				b.builderState.right.currentBatch = nil
+				bs.currentBatch = nil
 			}
 		})
 }
diff --git a/pkg/sql/colexec/colexecjoin/mergejoinbase.eg.go b/pkg/sql/colexec/colexecjoin/mergejoinbase.eg.go
index ce0ffcb2286b..765aa1537a76 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoinbase.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoinbase.eg.go
@@ -31,17 +31,13 @@ var (
 )
 
 // isBufferedGroupFinished checks to see whether or not the buffered group
-// corresponding to input continues in batch.
+// corresponding to the first tuple continues in batch.
 func (o *mergeJoinBase) isBufferedGroupFinished(
-	input *mergeJoinInput, batch coldata.Batch, rowIdx int,
+	input *mergeJoinInput, firstTuple []coldata.Vec, batch coldata.Batch, rowIdx int,
 ) bool {
 	if batch.Length() == 0 {
 		return true
 	}
-	bufferedGroup := o.bufferedGroup.left
-	if input == &o.right {
-		bufferedGroup = o.bufferedGroup.right
-	}
 	tupleToLookAtIdx := rowIdx
 	sel := batch.Selection()
 	if sel != nil {
@@ -62,7 +58,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -73,7 +69,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Bool()
+				bufferedCol := firstTuple[colIdx].Bool()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Bool()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -107,7 +103,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -118,7 +114,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Bytes()
+				bufferedCol := firstTuple[colIdx].Bytes()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Bytes()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -144,7 +140,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -155,7 +151,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Decimal()
+				bufferedCol := firstTuple[colIdx].Decimal()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Decimal()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -180,7 +176,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -191,7 +187,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Int16()
+				bufferedCol := firstTuple[colIdx].Int16()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Int16()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -224,7 +220,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -235,7 +231,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Int32()
+				bufferedCol := firstTuple[colIdx].Int32()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Int32()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -269,7 +265,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -280,7 +276,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Int64()
+				bufferedCol := firstTuple[colIdx].Int64()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Int64()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -317,7 +313,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -328,7 +324,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Float64()
+				bufferedCol := firstTuple[colIdx].Float64()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Float64()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -373,7 +369,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -384,7 +380,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Timestamp()
+				bufferedCol := firstTuple[colIdx].Timestamp()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Timestamp()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -417,7 +413,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -428,7 +424,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Interval()
+				bufferedCol := firstTuple[colIdx].Interval()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Interval()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -454,7 +450,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -465,7 +461,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].JSON()
+				bufferedCol := firstTuple[colIdx].JSON()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).JSON()
 				curVal := col.Get(tupleToLookAtIdx)
@@ -497,7 +493,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -508,7 +504,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].Datum()
+				bufferedCol := firstTuple[colIdx].Datum()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).Datum()
 				curVal := col.Get(tupleToLookAtIdx)
diff --git a/pkg/sql/colexec/colexecjoin/mergejoinbase_tmpl.go b/pkg/sql/colexec/colexecjoin/mergejoinbase_tmpl.go
index d7168305d113..8b4b1641473e 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoinbase_tmpl.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoinbase_tmpl.go
@@ -57,17 +57,13 @@ func _ASSIGN_EQ(_, _, _, _, _, _ interface{}) int {
 // */}}
 
 // isBufferedGroupFinished checks to see whether or not the buffered group
-// corresponding to input continues in batch.
+// corresponding to the first tuple continues in batch.
 func (o *mergeJoinBase) isBufferedGroupFinished(
-	input *mergeJoinInput, batch coldata.Batch, rowIdx int,
+	input *mergeJoinInput, firstTuple []coldata.Vec, batch coldata.Batch, rowIdx int,
 ) bool {
 	if batch.Length() == 0 {
 		return true
 	}
-	bufferedGroup := o.bufferedGroup.left
-	if input == &o.right {
-		bufferedGroup = o.bufferedGroup.right
-	}
 	tupleToLookAtIdx := rowIdx
 	sel := batch.Selection()
 	if sel != nil {
@@ -89,7 +85,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				// right side being an input) this check will always return false since
 				// nulls couldn't be buffered up though.
 				// TODO(yuzefovich): consider templating this.
-				bufferedNull := bufferedGroup.firstTuple[colIdx].MaybeHasNulls() && bufferedGroup.firstTuple[colIdx].Nulls().NullAt(0)
+				bufferedNull := firstTuple[colIdx].MaybeHasNulls() && firstTuple[colIdx].Nulls().NullAt(0)
 				incomingNull := batch.ColVec(int(colIdx)).MaybeHasNulls() && batch.ColVec(int(colIdx)).Nulls().NullAt(tupleToLookAtIdx)
 				if o.joinType.IsSetOpJoin() {
 					if bufferedNull && incomingNull {
@@ -100,7 +96,7 @@ func (o *mergeJoinBase) isBufferedGroupFinished(
 				if bufferedNull || incomingNull {
 					return true
 				}
-				bufferedCol := bufferedGroup.firstTuple[colIdx].TemplateType()
+				bufferedCol := firstTuple[colIdx].TemplateType()
 				prevVal := bufferedCol.Get(0)
 				col := batch.ColVec(int(colIdx)).TemplateType()
 				curVal := col.Get(tupleToLookAtIdx)
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner.go b/pkg/sql/colexec/colexecjoin/mergejoiner.go
index 45ddc5ad89bb..07f8af436e49 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner.go
@@ -12,7 +12,6 @@ package colexecjoin
 
 import (
 	"context"
-	"math"
 	"unsafe"
 
 	"github.com/cockroachdb/cockroach/pkg/col/coldata"
@@ -32,6 +31,161 @@ import (
 	"github.com/marusama/semaphore"
 )
 
+// The merge join operator uses a probe and build approach to generate the join.
+// What this means is that instead of going through and expanding the cross
+// product row by row, the operator performs two passes. The first pass
+// generates a list of groups of matching rows based on the equality columns
+// (where a "group" represents a contiguous set of rows that match on the
+// equality columns). The second pass is where the groups and their associated
+// cross products are materialized into the full output.
+//
+// A group describes the indexes of rows within the probing batches that are
+// equal on the equality columns. For example, if we have a group with indexes
+// [0, 3) on the left and [1, 3) on the right, it means that first three rows
+// from the left batch match with the 2nd and 3rd row from the right batch. In
+// order to produce the output we need to repeat each left row the number of
+// times equal to the size of the group on the right side (2 in the example);
+// and we need to repeat all rows from the right side of the group the number of
+// times equal to the size of the group on the left side (3 in the example).
+//
+// There is a complication, however, when a group might extend in the next
+// batch, and we call such a group a "buffered group". For example, imagine we
+// have a batch with rows {0, 0, 1, 2}, then the group containing the row with
+// value 2 is not complete with the current batch, and it might (or might not)
+// extend into the next batch.
+//
+// We handle these buffered groups definitely depending on the side of the join:
+// - the left buffered group is processed in a streaming fashion, one batch from
+//   the left input at a time
+// - the right buffered group needs to be fully buffered before we can build the
+//   output.
+//
+// Let's walk through a concrete example to show the general flow of state
+// transitions as well as to better show the terminology we use throughout the
+// merge joiner code.
+//
+// Imagine that we operate on batches of size 3 and have the following input
+// data:
+//                        left                      right
+//          batch 1        -1          batch 1        -1
+//                         -1                         -2
+//                         -2                         -3
+//          batch 2        -2          batch 2        -3
+//                         -2                         -3
+//                         -2                         -3
+//          batch 3        -3          batch 3        -3
+//                         -3
+//                         -3
+//          batch 4        -3
+//
+// We start of with reading first batches from both inputs (we store them in
+// mjProberState) and performing the probing step. We find that we have a single
+// group such that it contains rows with indices [0, 2) on the left and with
+// indices [0, 1) on the right. This group is fully contained within the batch
+// (i.e. it is not a buffered group), so we'll be able to populate the cross
+// product during the build step. The last row with index 2 in the left batch
+// starts the left buffered group (we don't know yet whether the second batch
+// from the left will have a row with value -2 or not), so we will have to
+// process the last row as the buffered group; we append a single row with index
+// 1 from the right batch into the right buffered group.
+//
+// At this point the probing is done, and the merge joiner transitions into
+// mjBuildFromBatch. Here we take the single group ([0, 2) on the left and
+// [0, 1) on the right) to produce a cross product, so we add two result rows
+// to the output batch. We don't emit the output yet because we want to fill up
+// the output to capacity. The output is now {(-1, -1), (-1, -1)}.
+//
+// Since we have something in the buffered group state, we transition into
+// building from the buffered group. There, we first have to make sure that we
+// buffer all rows (into a spilling queue) that are part of the right buffered
+// group; however, in this case we have found the boundaries of the right group
+// during the probing, so we don't need to read any new batches - the right
+// buffered group only contains a single row with index 1. At the same time, the
+// left buffered group was started with a single row with index 2, so we are
+// ready to perform the cross product. This produces a single row that we put
+// into the output batch. Now the output is at capacity, so we emit {(-1, -1),
+// (-1, -1), (-2, -2)}, but the merge joiner remains in mjBuildFromBufferedGroup
+// state.
+//
+// When Next() is called again, we see that we have fully emitted the cross
+// product based on the current left batch, so we fetch the second batch from
+// the left in continueLeftBufferedGroup(). There, after fetching the second
+// batch we first check whether the first row of the batch still belongs to the
+// left buffered group (it does), and then we run the ordered distinct in order
+// to find the first row that is not part of the current buffered group. In this
+// case it turns out that all 3 rows still belong to the same buffered group
+// that have value -2 in their equality columns. We produce a cross product of 3
+// rows on the left and a single row on the right, put it into the output and
+// emit {(-2, -2), (-2, -2), (-2, -2)}. The merge joiner remains in
+// mjBuildFromBufferedGroup state.
+//
+// When Next() is called, we fetch the third batch from the left and find that
+// its first row (value -3) is different from the current left buffered group
+// (value -2), so we have finished processing the buffered group for value -2.
+// The merge joiner transitions into mjEntry state (where we don't do anything
+// since we already have the prober state set up) and then into mjProbe state.
+//
+// As a reminder, we working with a batch {(-3), (-3), (-3)} from the left and
+// {(-1), (-2), (-3)} from the right. Note that on the left we're starting from
+// index 0 and from index 2 on the right. During probing, we find that all 3
+// rows from the left and the row with index 2 on the right are part of the same
+// group which might extend into the next batches from either side, so we start
+// a buffered group on the left and append the single row into the right
+// buffered group.
+//
+// We don't have any groups fully contained within the probing batches to build
+// from, so we transition into building from the buffered group. There, we will
+// actually read the second and the third batches from the right and append all
+// of them into the spilling queue. The right input is now exhausted. We now
+// have 5 rows with value -3 in the spilling queue, and we proceed to build the
+// cross product with 3 rows from the left. This will result in 15 rows emitted
+// in 5 batches.
+//
+// Once we've emitted those 5 batches, we see that we have fully built the cross
+// product based on the third left batch, so we fetch the last fourth batch from
+// the left. It has a single row that is still part of the same group, so we
+// will build the cross product against 5 rows in the spilling queue of the
+// right buffered group.
+//
+// At this point, we have exhausted both inputs, and we're done.
+
+// mjState represents the state of the merge joiner.
+type mjState int
+
+const (
+	// mjEntry is the entry state of the merge joiner where all the batches and
+	// indices are properly set, regardless if Next was called the first time or
+	// the 1000th time. This state also routes into the correct state based on
+	// the prober state after setup.
+	mjEntry mjState = iota
+
+	// mjSourceFinished is the state in which one of the input sources has no
+	// more available batches, thus signaling that the joiner should begin
+	// wrapping up execution by outputting any remaining groups in state. After
+	// reaching this state, we can only build from the batch.
+	mjSourceFinished
+
+	// mjProbe is the main probing state in which the groups for the current
+	// batch are determined.
+	mjProbe
+
+	// mjBuildFromBatch indicates that we should be building from the current
+	// probing batches. Note that in such case we might have multiple groups to
+	// build.
+	mjBuildFromBatch
+
+	// mjBuildFromBufferedGroup indicates that we should be building from the
+	// current left batch and the right buffered group. Note that in such case
+	// we have at most one group to build and are building the output one batch
+	// from the left input at a time.
+	mjBuildFromBufferedGroup
+
+	// mjDone is the final state of the merge joiner in which it'll be returning
+	// only zero-length batches. In this state, the disk infrastructure is
+	// cleaned up.
+	mjDone
+)
+
 // group is an ADT representing a contiguous set of rows that match on their
 // equality columns.
 type group struct {
@@ -53,36 +207,14 @@ type group struct {
 	unmatched bool
 }
 
-// mjBuildFrom is an indicator of which source we're building the output from.
-type mjBuildFrom int
-
-const (
-	// mjBuildFromBatch indicates that we should be building from the current
-	// probing batches. Note that in such case we might have multiple groups to
-	// build.
-	mjBuildFromBatch mjBuildFrom = iota
-	// mjBuildFromBufferedGroup indicates that we should be building from the
-	// buffered group. Note that in such case we might have at most one group to
-	// build.
-	mjBuildFromBufferedGroup
-)
-
 // mjBuilderState contains all the state required to execute the build phase.
 type mjBuilderState struct {
-	buildFrom mjBuildFrom
-
 	// Fields to identify the groups in the input sources.
 	lGroups []group
 	rGroups []group
 
 	// outCount keeps record of the current number of rows in the output.
 	outCount int
-	// outFinished is used to determine if the builder is finished outputting
-	// the groups from input.
-	outFinished bool
-
-	totalOutCountFromBufferedGroup  int
-	alreadyEmittedFromBufferedGroup int
 
 	// Cross product materialization state.
 	left  mjBuilderCrossProductState
@@ -98,71 +230,46 @@ type mjBuilderCrossProductState struct {
 	numRepeatsIdx  int
 }
 
-// mjBufferedGroup is a helper struct that stores information about the tuples
-// from both inputs for the buffered group.
-type mjBufferedGroup struct {
-	// firstTuple stores a single tuple that was first in the buffered group.
-	firstTuple   []coldata.Vec
-	scratchBatch coldata.Batch
-}
-
 type mjBufferedGroupState struct {
-	// Local buffer for the last left and right groups which is used when the
-	// group ends with a batch and the group on each side needs to be saved to
-	// state in order to be able to continue it in the next batch.
-	left        mjBufferedGroup
-	right       mjBufferedGroup
-	helper      *crossJoinerBase
-	needToReset bool
+	// leftFirstTuple is the first tuple of the left buffered group. It is set
+	// only in case the left buffered group spans more than one input batch.
+	leftFirstTuple []coldata.Vec
+	// leftGroupStartIdx is the position within the current left batch where the
+	// left buffered group starts. If the group spans multiple batches, this
+	// will be set to 0 on all consecutive batches.
+	//
+	// Note that proberState.lIdx indicates the exclusive end position for the
+	// left buffered group within the current batch.
+	leftGroupStartIdx int
+	// leftBatchDone indicates whether the output from the current left batch
+	// has been fully built.
+	leftBatchDone bool
+	// rightFirstTuple is the first tuple of the right buffered group. It is set
+	// only in case the right buffered group spans more than one input batch.
+	rightFirstTuple []coldata.Vec
+	// scratchSel is a scratch selection vector initialized only when needed.
+	scratchSel []int
+
+	// helper is the building facility for the cross join of the buffered group.
+	helper *crossJoinerBase
 }
 
 // mjProberState contains all the state required to execute in the probing
 // phase.
 type mjProberState struct {
 	// Fields to save the "working" batches to state in between outputs.
-	lBatch  coldata.Batch
-	rBatch  coldata.Batch
+	lBatch coldata.Batch
+	rBatch coldata.Batch
+	// lIdx indicates the index of the first left tuple that hasn't been probed
+	// yet.
 	lIdx    int
 	lLength int
+	// rIdx indicates the index of the first right tuple that hasn't been probed
+	// yet.
 	rIdx    int
 	rLength int
 }
 
-// mjState represents the state of the merge joiner.
-type mjState int
-
-const (
-	// mjEntry is the entry state of the merge joiner where all the batches and
-	// indices are properly set, regardless if Next was called the first time or
-	// the 1000th time. This state also routes into the correct state based on
-	// the prober state after setup.
-	mjEntry mjState = iota
-
-	// mjSourceFinished is the state in which one of the input sources has no
-	// more available batches, thus signaling that the joiner should begin
-	// wrapping up execution by outputting any remaining groups in state.
-	mjSourceFinished
-
-	// mjFinishBufferedGroup is the state in which the previous state resulted in
-	// a group that ended with a batch. Such a group was buffered, and this state
-	// finishes that group and builds the output.
-	mjFinishBufferedGroup
-
-	// mjProbe is the main probing state in which the groups for the current
-	// batch are determined.
-	mjProbe
-
-	// mjBuild is the state in which the groups determined by the probing states
-	// are built, i.e. materialized to the output member by creating the cross
-	// product.
-	mjBuild
-
-	// mjDone is the final state of the merge joiner in which it'll be returning
-	// only zero-length batches. In this state, the disk infrastructure is
-	// cleaned up.
-	mjDone
-)
-
 type mergeJoinInput struct {
 	// eqCols specify the indices of the source table equality columns during the
 	// merge join.
@@ -194,21 +301,6 @@ type mergeJoinInput struct {
 	source colexecop.Operator
 }
 
-// The merge join operator uses a probe and build approach to generate the
-// join. What this means is that instead of going through and expanding the
-// cross product row by row, the operator performs two passes.
-// The first pass generates a list of groups of matching rows based on the
-// equality columns (where a "group" represents a contiguous set of rows that
-// match on the equality columns).
-// The second pass is where the groups and their associated cross products are
-// materialized into the full output.
-
-// Two buffers are used, one for the group on the left table and one for the
-// group on the right table. These buffers are only used if the group ends with
-// a batch, to make sure that we don't miss any cross product entries while
-// expanding the groups (leftGroups and rightGroups) when a group spans
-// multiple batches.
-
 // NewMergeJoinOp returns a new merge join operator with the given spec that
 // implements sort-merge join. It performs a merge on the left and right input
 // sources, based on the equality columns, assuming both inputs are in sorted
@@ -465,11 +557,9 @@ type mergeJoinBase struct {
 	right              mergeJoinInput
 
 	// Output buffer definition.
-	output      coldata.Batch
-	outputTypes []*types.T
-	// outputReady is a flag to indicate that merge joiner is ready to emit an
-	// output batch.
-	outputReady bool
+	output         coldata.Batch
+	outputCapacity int
+	outputTypes    []*types.T
 
 	// Local buffer for the "working" repeated groups.
 	groups circularGroupsBuffer
@@ -492,10 +582,8 @@ func (o *mergeJoinBase) Reset(ctx context.Context) {
 	if r, ok := o.right.source.(colexecop.Resetter); ok {
 		r.Reset(ctx)
 	}
-	o.outputReady = false
 	o.state = mjEntry
 	o.bufferedGroup.helper.Reset(ctx)
-	o.bufferedGroup.needToReset = false
 	o.proberState.lBatch = nil
 	o.proberState.rBatch = nil
 	o.resetBuilderCrossProductState()
@@ -506,10 +594,10 @@ func (o *mergeJoinBase) Init(ctx context.Context) {
 		return
 	}
 	o.outputTypes = o.joinType.MakeOutputTypes(o.left.sourceTypes, o.right.sourceTypes)
-	o.bufferedGroup.left.firstTuple = o.unlimitedAllocator.NewMemBatchWithFixedCapacity(
+	o.bufferedGroup.leftFirstTuple = o.unlimitedAllocator.NewMemBatchWithFixedCapacity(
 		o.left.sourceTypes, 1, /* capacity */
 	).ColVecs()
-	o.bufferedGroup.right.firstTuple = o.unlimitedAllocator.NewMemBatchWithFixedCapacity(
+	o.bufferedGroup.rightFirstTuple = o.unlimitedAllocator.NewMemBatchWithFixedCapacity(
 		o.right.sourceTypes, 1, /* capacity */
 	).ColVecs()
 	o.bufferedGroup.helper = newCrossJoinerBase(
@@ -532,54 +620,58 @@ func (o *mergeJoinBase) resetBuilderCrossProductState() {
 	o.builderState.right.reset()
 }
 
-// appendToBufferedGroup appends all the tuples from batch that are part of the
-// same group as the ones in the buffered group that corresponds to the input
-// source. This needs to happen when a group starts at the end of an input
+// startLeftBufferedGroup initializes the left buffered group. It will set the
+// first tuple in case the left buffered group doesn't end in the current left
+// batch.
+func (o *mergeJoinBase) startLeftBufferedGroup(sel []int, groupStartIdx int, groupLength int) {
+	if groupStartIdx+groupLength < o.proberState.lLength {
+		// The left buffered group is complete within the current left batch, so
+		// we don't need to copy the first tuple.
+		return
+	}
+	o.unlimitedAllocator.PerformOperation(o.bufferedGroup.leftFirstTuple, func() {
+		for colIdx := range o.left.sourceTypes {
+			o.bufferedGroup.leftFirstTuple[colIdx].Copy(
+				coldata.SliceArgs{
+					Src:         o.proberState.lBatch.ColVec(colIdx),
+					Sel:         sel,
+					DestIdx:     0,
+					SrcStartIdx: groupStartIdx,
+					SrcEndIdx:   groupStartIdx + 1,
+				},
+			)
+		}
+	})
+}
+
+// appendToRightBufferedGroup appends the tuples in
+// [groupStartIdx; groupStartIdx+groupLength) range from the current right
+// batch. This needs to happen when a group starts at the end of an input
 // batch and can continue into the following batches.
+//
 // A zero-length batch needs to be appended when no more batches will be
-// appended to the buffered group.
-func (o *mergeJoinBase) appendToBufferedGroup(
-	input *mergeJoinInput, batch coldata.Batch, sel []int, groupStartIdx int, groupLength int,
-) {
-	var (
-		bufferedGroup     *mjBufferedGroup
-		sourceTypes       []*types.T
-		bufferedTuples    *colexecutils.SpillingQueue
-		numBufferedTuples int
-	)
-	if input == &o.left {
-		sourceTypes = o.left.sourceTypes
-		bufferedGroup = &o.bufferedGroup.left
-		bufferedTuples = o.bufferedGroup.helper.left.tuples
-		numBufferedTuples = o.bufferedGroup.helper.left.numTuples
-		o.bufferedGroup.helper.left.numTuples += groupLength
-	} else {
-		sourceTypes = o.right.sourceTypes
-		bufferedGroup = &o.bufferedGroup.right
-		bufferedTuples = o.bufferedGroup.helper.right.tuples
-		numBufferedTuples = o.bufferedGroup.helper.right.numTuples
-		o.bufferedGroup.helper.right.numTuples += groupLength
-	}
-	if batch.Length() == 0 || groupLength == 0 {
+// appended to the buffered group (which can be achieved by specifying an empty
+// range with groupLength == 0).
+func (o *mergeJoinBase) appendToRightBufferedGroup(sel []int, groupStartIdx int, groupLength int) {
+	bufferedTuples := o.bufferedGroup.helper.rightTuples
+	if groupLength == 0 {
 		// We have finished appending to this buffered group, so we need to
 		// Enqueue a zero-length batch per the contract of the spilling queue.
 		bufferedTuples.Enqueue(o.Ctx, coldata.ZeroBatch)
 		return
 	}
-	// TODO(yuzefovich): for LEFT/RIGHT ANTI joins we only need to store the
-	// first tuple (in order to find the boundaries of the groups) since all
-	// of the buffered tuples do have a match and, thus, don't contribute to
-	// the output.
-	// TODO(yuzefovich): for INTERSECT/EXCEPT ALL joins we can buffer only
-	// tuples from the left side and count the number of tuples on the right.
-	// TODO(yuzefovich): for LEFT/RIGHT SEMI joins we only need to buffer tuples
-	// from one side (left/right respectively).
-	if numBufferedTuples == 0 {
-		o.unlimitedAllocator.PerformOperation(bufferedGroup.firstTuple, func() {
+	sourceTypes := o.right.sourceTypes
+	numBufferedTuples := o.bufferedGroup.helper.numRightTuples
+	o.bufferedGroup.helper.numRightTuples += groupLength
+	if numBufferedTuples == 0 && groupStartIdx+groupLength == o.proberState.rLength {
+		// Set the right first tuple only if this is the first call to this
+		// method for the current right buffered group and if the group doesn't
+		// end in the current batch.
+		o.unlimitedAllocator.PerformOperation(o.bufferedGroup.rightFirstTuple, func() {
 			for colIdx := range sourceTypes {
-				bufferedGroup.firstTuple[colIdx].Copy(
+				o.bufferedGroup.rightFirstTuple[colIdx].Copy(
 					coldata.SliceArgs{
-						Src:         batch.ColVec(colIdx),
+						Src:         o.proberState.rBatch.ColVec(colIdx),
 						Sel:         sel,
 						DestIdx:     0,
 						SrcStartIdx: groupStartIdx,
@@ -590,101 +682,156 @@ func (o *mergeJoinBase) appendToBufferedGroup(
 		})
 	}
 
-	// We don't impose any memory limits on the scratch batch because we rely on
-	// the inputs to the merge joiner to produce reasonably sized batches.
-	const maxBatchMemSize = math.MaxInt64
-	bufferedGroup.scratchBatch, _ = o.unlimitedAllocator.ResetMaybeReallocate(
-		input.sourceTypes, bufferedGroup.scratchBatch, groupLength, maxBatchMemSize,
-	)
-	o.unlimitedAllocator.PerformOperation(bufferedGroup.scratchBatch.ColVecs(), func() {
-		for colIdx := range input.sourceTypes {
-			bufferedGroup.scratchBatch.ColVec(colIdx).Copy(
-				coldata.SliceArgs{
-					Src:         batch.ColVec(colIdx),
-					Sel:         sel,
-					DestIdx:     0,
-					SrcStartIdx: groupStartIdx,
-					SrcEndIdx:   groupStartIdx + groupLength,
-				},
+	// TODO(yuzefovich): check whether it's worth templating this method out as
+	// well as having join-type-specific crossJoinerBase.
+	switch o.joinType {
+	case descpb.LeftSemiJoin, descpb.RightAntiJoin:
+		// For LEFT SEMI and RIGHT ANTI joins we only need to store the first
+		// tuple (in order to find the boundaries of the groups) since all of
+		// the buffered tuples don't/do have a match and, thus, do/don't
+		// contribute to the output.
+		return
+	case descpb.IntersectAllJoin, descpb.ExceptAllJoin:
+		// For INTERSECT/EXCEPT ALL joins we only need the number of tuples on
+		// the right side (which we have already updated above).
+		return
+	}
+
+	// Update the selection on the probing batch to only include tuples from the
+	// buffered group.
+	rBatch, rLength := o.proberState.rBatch, o.proberState.rLength
+	rSel := rBatch.Selection()
+	rBatchHasSel := rSel != nil
+	// No need to modify the batch if the whole batch is part of the buffered
+	// group.
+	needToModify := groupStartIdx != 0 || groupLength != rLength
+	if needToModify {
+		if rBatchHasSel {
+			// Since rBatch already has a selection vector which we'll be
+			// modifying, we need to copy the original.
+			o.bufferedGroup.scratchSel = colexecutils.EnsureSelectionVectorLength(o.bufferedGroup.scratchSel, rLength)
+			copy(o.bufferedGroup.scratchSel, rSel)
+			// Now we need to shift elements in range
+			// [groupStartIdx; groupStartIdx+groupLength) to the beginning of
+			// the selection vector and then update the length of the batch
+			// accordingly.
+			copy(rSel[:groupLength], rSel[groupStartIdx:groupStartIdx+groupLength])
+			rBatch.SetLength(groupLength)
+		} else {
+			// Since rBatch doesn't have a selection vector, we will set the
+			// selection vector to include tuples in range
+			// [groupStartIdx; groupStartIdx+groupLength).
+			colexecutils.UpdateBatchState(
+				rBatch, groupLength, true, /* usesSel */
+				colexecutils.DefaultSelectionVector[groupStartIdx:groupStartIdx+groupLength],
 			)
 		}
-		bufferedGroup.scratchBatch.SetLength(groupLength)
-	})
-	bufferedTuples.Enqueue(o.Ctx, bufferedGroup.scratchBatch)
-}
+	}
 
-// setBuilderSourceToBatch sets the builder state to use groups from the
-// circular group buffer and the batches from input. This happens when we have
-// groups that are fully contained within a single input batch from each of the
-// sources.
-func (o *mergeJoinBase) setBuilderSourceToBatch() {
-	o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
-	o.builderState.buildFrom = mjBuildFromBatch
-}
+	bufferedTuples.Enqueue(o.Ctx, rBatch)
 
-// initProberState sets the batches, lengths, and current indices to the right
-// locations given the last iteration of the operator.
-func (o *mergeJoinBase) initProberState() {
-	// If this is the first batch or we're done with the current batch, get the
-	// next batch.
-	if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
-		o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
-		o.proberState.lLength = o.proberState.lBatch.Length()
-	}
-	if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
-		o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
-		o.proberState.rLength = o.proberState.rBatch.Length()
-	}
-	if o.bufferedGroup.needToReset {
-		o.bufferedGroup.helper.Reset(o.Ctx)
-		o.bufferedGroup.needToReset = false
+	// If we had to modify the batch, then restore the original state now.
+	if needToModify {
+		colexecutils.UpdateBatchState(
+			rBatch, rLength, rBatchHasSel, o.bufferedGroup.scratchSel,
+		)
 	}
 }
 
-// nonEmptyBufferedGroup returns true if there is a buffered group that needs
-// to be finished.
-func (o *mergeJoinBase) nonEmptyBufferedGroup() bool {
-	return o.bufferedGroup.helper.left.numTuples > 0 || o.bufferedGroup.helper.right.numTuples > 0
-}
-
 // sourceFinished returns true if either of input sources has no more rows.
 func (o *mergeJoinBase) sourceFinished() bool {
 	return o.proberState.lLength == 0 || o.proberState.rLength == 0
 }
 
-// finishBufferedGroup appends a zero-length batch to the buffered group which
-// is required by the contract of the spilling queue.
-func (o *mergeJoinBase) finishBufferedGroup(input *mergeJoinInput) {
-	o.appendToBufferedGroup(
-		input, coldata.ZeroBatch, nil, /* sel */
-		0 /* groupStartIdx */, 0, /* groupLength */
+// continueLeftBufferedGroup fetches the next batch from the left input and
+// and updates the probing and buffered group states accordingly.
+func (o *mergeJoinBase) continueLeftBufferedGroup() {
+	// Get the next batch from the left.
+	o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+	o.proberState.lLength = o.proberState.lBatch.Length()
+	o.bufferedGroup.leftGroupStartIdx = 0
+	if o.proberState.lLength == 0 {
+		// The left input has been fully exhausted.
+		return
+	}
+	// Check whether the first tuple of this batch is still part of the left
+	// buffered group.
+	if o.isBufferedGroupFinished(&o.left, o.bufferedGroup.leftFirstTuple, o.proberState.lBatch, 0 /* rowIdx */) {
+		return
+	}
+
+	// It is ok that we might call Init() multiple times - it'll be a noop after
+	// the first one.
+	o.left.distincter.Init(o.Ctx)
+	o.left.distincter.(colexecop.Resetter).Reset(o.Ctx)
+	// Ignore the first row of the distincter since we already know that we are
+	// in the same group and, thus, the row is not distinct, regardless of what
+	// the distincter outputs.
+	groupLength := 1
+	var sel []int
+	o.left.distincterInput.SetBatch(o.proberState.lBatch)
+	o.left.distincter.Next()
+
+	sel = o.proberState.lBatch.Selection()
+	if sel != nil {
+		for ; groupLength < o.proberState.lLength; groupLength++ {
+			if o.left.distinctOutput[sel[groupLength]] {
+				// We found the beginning of a new group!
+				break
+			}
+		}
+	} else {
+		for ; groupLength < o.proberState.lLength; groupLength++ {
+			if o.left.distinctOutput[groupLength] {
+				// We found the beginning of a new group!
+				break
+			}
+		}
+	}
+
+	// Zero out the distinct output for the next time we use the distincter on
+	// the left input.
+	copy(o.left.distinctOutput[:o.proberState.lLength], colexecutils.ZeroBoolColumn)
+	o.proberState.lIdx += groupLength
+}
+
+// finishRightBufferedGroup appends a zero-length batch to the right buffered
+// group which is required by the contract of the spilling queue. Note that it
+// is safe to call this method multiple times (only the first one is not a
+// noop).
+func (o *mergeJoinBase) finishRightBufferedGroup() {
+	o.appendToRightBufferedGroup(
+		nil /* sel */, 0 /* groupStartIdx */, 0, /* groupLength */
 	)
 }
 
-// completeBufferedGroup extends the buffered group corresponding to input.
-// First, we check that the first row in batch is still part of the same group.
-// If this is the case, we use the Distinct operator to find the first
-// occurrence in batch (or subsequent batches) that doesn't match the current
-// group.
+// completeRightBufferedGroup extends the right buffered group. It will read all
+// tuples from the right input that are part of the current right buffered group
+// (which must have been initialized via appendToRightBufferedGroup).
+//
 // NOTE: we will be buffering all batches until we find such non-matching tuple
-// (or until we exhaust the input).
-// TODO(yuzefovich): this can be refactored so that only the right side does
-// unbounded buffering.
-// SIDE EFFECT: can append to the buffered group corresponding to the source.
-func (o *mergeJoinBase) completeBufferedGroup(
-	input *mergeJoinInput, batch coldata.Batch, rowIdx int,
-) (_ coldata.Batch, idx int, batchLength int) {
-	batchLength = batch.Length()
-	if o.isBufferedGroupFinished(input, batch, rowIdx) {
-		o.finishBufferedGroup(input)
-		return batch, rowIdx, batchLength
+// (or until we exhaust the right input).
+func (o *mergeJoinBase) completeRightBufferedGroup() {
+	// Get the next batch from the right.
+	o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+	o.proberState.rLength = o.proberState.rBatch.Length()
+	// The right input has been fully exhausted.
+	if o.proberState.rLength == 0 {
+		o.finishRightBufferedGroup()
+		return
+	}
+	// Check whether the first tuple of this batch is still part of the right
+	// buffered group.
+	if o.isBufferedGroupFinished(&o.right, o.bufferedGroup.rightFirstTuple, o.proberState.rBatch, 0 /* rowIdx */) {
+		o.finishRightBufferedGroup()
+		return
 	}
 
 	isBufferedGroupComplete := false
 	// It is ok that we might call Init() multiple times - it'll be a noop after
 	// the first one.
-	input.distincter.Init(o.Ctx)
-	input.distincter.(colexecop.Resetter).Reset(o.Ctx)
+	o.right.distincter.Init(o.Ctx)
+	o.right.distincter.(colexecop.Resetter).Reset(o.Ctx)
 	// Ignore the first row of the distincter in the first pass since we already
 	// know that we are in the same group and, thus, the row is not distinct,
 	// regardless of what the distincter outputs.
@@ -696,22 +843,22 @@ func (o *mergeJoinBase) completeBufferedGroup(
 		// previous iterations had only the matching tuples to the buffered group,
 		// so the distincter - in a sense - compares the incoming tuples to the
 		// first tuple of the first iteration (which we know is the same group).
-		input.distincterInput.SetBatch(batch)
-		input.distincter.Next()
+		o.right.distincterInput.SetBatch(o.proberState.rBatch)
+		o.right.distincter.Next()
 
-		sel = batch.Selection()
+		sel = o.proberState.rBatch.Selection()
 		var groupLength int
 		if sel != nil {
-			for groupLength = loopStartIndex; groupLength < batchLength; groupLength++ {
-				if input.distinctOutput[sel[groupLength]] {
+			for groupLength = loopStartIndex; groupLength < o.proberState.rLength; groupLength++ {
+				if o.right.distinctOutput[sel[groupLength]] {
 					// We found the beginning of a new group!
 					isBufferedGroupComplete = true
 					break
 				}
 			}
 		} else {
-			for groupLength = loopStartIndex; groupLength < batchLength; groupLength++ {
-				if input.distinctOutput[groupLength] {
+			for groupLength = loopStartIndex; groupLength < o.proberState.rLength; groupLength++ {
+				if o.right.distinctOutput[groupLength] {
 					// We found the beginning of a new group!
 					isBufferedGroupComplete = true
 					break
@@ -720,42 +867,26 @@ func (o *mergeJoinBase) completeBufferedGroup(
 		}
 
 		// Zero out the distinct output for the next pass.
-		copy(input.distinctOutput[:batchLength], colexecutils.ZeroBoolColumn)
+		copy(o.right.distinctOutput[:o.proberState.rLength], colexecutils.ZeroBoolColumn)
 		loopStartIndex = 0
 
 		// Buffer all the tuples that are part of the buffered group.
-		o.appendToBufferedGroup(input, batch, sel, rowIdx, groupLength)
-		rowIdx += groupLength
+		o.appendToRightBufferedGroup(sel, o.proberState.rIdx, groupLength)
+		o.proberState.rIdx += groupLength
 
 		if !isBufferedGroupComplete {
 			// The buffered group is still not complete which means that we have
 			// just appended all the tuples from batch to it, so we need to get a
 			// fresh batch from the input.
-			rowIdx, batch = 0, input.source.Next()
-			batchLength = batch.Length()
-			if batchLength == 0 {
+			o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+			o.proberState.rLength = o.proberState.rBatch.Length()
+			if o.proberState.rLength == 0 {
 				// The input has been exhausted, so the buffered group is now complete.
 				isBufferedGroupComplete = true
-				o.finishBufferedGroup(input)
 			}
 		}
 	}
-
-	return batch, rowIdx, batchLength
-}
-
-// finishProbe completes the buffered groups on both sides of the input.
-func (o *mergeJoinBase) finishProbe() {
-	o.proberState.lBatch, o.proberState.lIdx, o.proberState.lLength = o.completeBufferedGroup(
-		&o.left,
-		o.proberState.lBatch,
-		o.proberState.lIdx,
-	)
-	o.proberState.rBatch, o.proberState.rIdx, o.proberState.rLength = o.completeBufferedGroup(
-		&o.right,
-		o.proberState.rBatch,
-		o.proberState.rIdx,
-	)
+	o.finishRightBufferedGroup()
 }
 
 func (o *mergeJoinBase) Close() error {
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_exceptall.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_exceptall.eg.go
index db453acee5f5..86e91620a528 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_exceptall.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_exceptall.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinExceptAllOp{}
 func (o *mergeJoinExceptAllOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,19 +77,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -214,19 +212,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -289,7 +287,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -299,10 +297,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -324,19 +327,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -437,19 +438,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -504,7 +505,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -514,10 +515,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -539,19 +545,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -652,19 +656,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -719,7 +723,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -729,10 +733,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -753,19 +762,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -899,19 +906,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -977,7 +984,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -987,10 +994,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -1008,19 +1020,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1154,19 +1164,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -1232,7 +1242,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1242,10 +1252,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1264,19 +1279,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1410,19 +1423,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -1488,7 +1501,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1498,10 +1511,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1523,19 +1541,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1693,19 +1709,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -1779,7 +1795,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1789,10 +1805,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1814,19 +1835,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1948,19 +1967,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -2022,7 +2041,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2032,10 +2051,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -2057,19 +2081,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -2170,19 +2192,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -2237,7 +2259,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2247,10 +2269,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2272,19 +2299,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -2403,19 +2428,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -2476,7 +2501,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2486,10 +2511,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2511,19 +2541,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -2630,19 +2658,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -2699,7 +2727,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2709,10 +2737,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2727,13 +2760,13 @@ EqLoop:
 func (o *mergeJoinExceptAllOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2756,19 +2789,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2893,19 +2924,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -2968,7 +2999,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2978,10 +3009,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -3003,19 +3039,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3116,19 +3150,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -3183,7 +3217,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3193,10 +3227,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -3218,19 +3257,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3331,19 +3368,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -3398,7 +3435,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3408,10 +3445,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3432,19 +3474,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3578,19 +3618,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -3656,7 +3696,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3666,10 +3706,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -3687,19 +3732,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3833,19 +3876,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -3911,7 +3954,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3921,10 +3964,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3943,19 +3991,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -4089,19 +4135,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -4167,7 +4213,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4177,10 +4223,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -4202,19 +4253,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -4372,19 +4421,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -4458,7 +4507,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4468,10 +4517,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4493,19 +4547,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -4627,19 +4679,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -4701,7 +4753,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4711,10 +4763,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -4736,19 +4793,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -4849,19 +4904,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -4916,7 +4971,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4926,10 +4981,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -4951,19 +5011,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -5082,19 +5140,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -5155,7 +5213,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5165,10 +5223,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -5190,19 +5253,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -5309,19 +5370,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -5378,7 +5439,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5388,10 +5449,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -5406,13 +5472,13 @@ EqLoop:
 func (o *mergeJoinExceptAllOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -5435,19 +5501,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5572,19 +5636,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -5647,7 +5711,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5657,10 +5721,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5682,19 +5751,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5795,19 +5862,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -5862,7 +5929,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5872,10 +5939,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5897,19 +5969,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -6010,19 +6080,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -6077,7 +6147,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6087,10 +6157,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -6111,19 +6186,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -6257,19 +6330,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -6335,7 +6408,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6345,10 +6418,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6366,19 +6444,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -6512,19 +6588,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -6590,7 +6666,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6600,10 +6676,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -6622,19 +6703,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -6768,19 +6847,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -6846,7 +6925,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6856,10 +6935,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6881,19 +6965,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -7051,19 +7133,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -7137,7 +7219,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7147,10 +7229,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -7172,19 +7259,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -7306,19 +7391,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -7380,7 +7465,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7390,10 +7475,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -7415,19 +7505,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -7528,19 +7616,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -7595,7 +7683,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7605,10 +7693,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -7630,19 +7723,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -7761,19 +7852,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -7834,7 +7925,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7844,10 +7935,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7869,19 +7965,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -7988,19 +8082,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -8057,7 +8151,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8067,10 +8161,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -8085,13 +8184,13 @@ EqLoop:
 func (o *mergeJoinExceptAllOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -8114,19 +8213,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -8251,19 +8348,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -8326,7 +8423,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8336,10 +8433,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -8361,19 +8463,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -8474,19 +8574,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -8541,7 +8641,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8551,10 +8651,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -8576,19 +8681,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -8689,19 +8792,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -8756,7 +8859,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8766,10 +8869,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -8790,19 +8898,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -8936,19 +9042,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -9014,7 +9120,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9024,10 +9130,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -9045,19 +9156,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -9191,19 +9300,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -9269,7 +9378,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9279,10 +9388,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -9301,19 +9415,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -9447,19 +9559,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -9525,7 +9637,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9535,10 +9647,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -9560,19 +9677,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -9730,19 +9845,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -9816,7 +9931,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9826,10 +9941,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -9851,19 +9971,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -9985,19 +10103,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -10059,7 +10177,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10069,10 +10187,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -10094,19 +10217,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -10207,19 +10328,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -10274,7 +10395,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10284,10 +10405,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -10309,19 +10435,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -10440,19 +10564,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -10513,7 +10637,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10523,10 +10647,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -10548,19 +10677,17 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -10667,19 +10794,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// For EXCEPT ALL join we add (lGroupLength - rGroupLength) number
@@ -10736,7 +10863,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10746,10 +10873,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -10790,7 +10922,6 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -10838,8 +10969,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10903,8 +11034,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10968,8 +11099,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11032,8 +11163,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11093,8 +11224,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11155,8 +11286,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11220,8 +11351,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11285,8 +11416,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11350,8 +11481,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11415,8 +11546,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11480,8 +11611,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11554,8 +11685,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11618,8 +11749,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11682,8 +11813,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11745,8 +11876,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11805,8 +11936,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11866,8 +11997,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11930,8 +12061,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11994,8 +12125,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12058,8 +12189,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12122,8 +12253,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12186,8 +12317,8 @@ func (o *mergeJoinExceptAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12258,7 +12389,6 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -12294,8 +12424,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12361,8 +12491,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12428,8 +12558,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12494,8 +12624,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12557,8 +12687,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12621,8 +12751,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12688,8 +12818,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12755,8 +12885,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12822,8 +12952,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12889,8 +13019,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12956,8 +13086,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13033,8 +13163,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13100,8 +13230,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13167,8 +13297,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13233,8 +13363,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13296,8 +13426,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13360,8 +13490,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13427,8 +13557,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13494,8 +13624,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13561,8 +13691,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13628,8 +13758,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13695,8 +13825,8 @@ func (o *mergeJoinExceptAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -13780,20 +13910,6 @@ func (o *mergeJoinExceptAllOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinExceptAllOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -13820,14 +13936,13 @@ func (o *mergeJoinExceptAllOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinExceptAllOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinExceptAllOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
 		if !groups[i].unmatched {
 			// "Matched" groups are not outputted in LEFT ANTI, RIGHT ANTI,
@@ -13835,51 +13950,152 @@ func (o *mergeJoinExceptAllOp) calculateOutputCount(groups []group) int {
 			// they do not contribute to the output count.
 			continue
 		}
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinExceptAllOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinExceptAllOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-		}
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinExceptAllOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+	// For EXCEPT ALL joins we build # left tuples - # right tuples output rows
+	// (if positive), so we have to discard first numRightTuples rows from the
+	// left.
+	numSkippedLeft := 0
+	for {
+		groupLength := o.proberState.lIdx - o.bufferedGroup.leftGroupStartIdx
+		if numSkippedLeft+groupLength > o.bufferedGroup.helper.numRightTuples {
+			// The current left batch is the first one that contains tuples
+			// without a "match".
+			break
+		}
+		numSkippedLeft += groupLength
+		var groupFinished bool
+		if o.proberState.lIdx < o.proberState.lLength {
+			// The group on the left is finished within the current left
+			// batch.
+			groupFinished = true
 		} else {
-			o.builderState.outFinished = true
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			groupFinished = o.proberState.lIdx == 0
 		}
-		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
+		if groupFinished {
+			// We have less matching tuples on the left than on the right, so we
+			// don't emit any output for this buffered group.
+			o.bufferedGroup.helper.Reset(o.Ctx)
+			o.state = mjEntry
+			return
 		}
+	}
+	// We might need to skip some tuples in the current left batch since they
+	// still had matches with the right side.
+	toSkipInThisBatch := o.bufferedGroup.helper.numRightTuples - numSkippedLeft
+	startIdx := o.bufferedGroup.leftGroupStartIdx + toSkipInThisBatch
 
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinExceptAllOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
+		}
 
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
+		} else {
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
+		}
+		o.builderState.outCount += willEmit
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
+		}
 	}
 }
 
@@ -13887,30 +14103,29 @@ func (o *mergeJoinExceptAllOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the right one,
@@ -13919,49 +14134,51 @@ func (o *mergeJoinExceptAllOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.lIdx < o.proberState.lLength {
 				o.exhaustLeftSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the left into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
+			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
 			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_fullouter.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_fullouter.eg.go
index a613f1b7086b..0d8d1fde22d9 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_fullouter.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_fullouter.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinFullOuterOp{}
 func (o *mergeJoinFullOuterOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,43 +77,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -211,19 +211,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -304,7 +304,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -314,7 +314,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -324,10 +324,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -349,43 +354,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -459,19 +464,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -536,7 +541,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -546,7 +551,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -556,10 +561,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -581,43 +591,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -691,19 +701,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -768,7 +778,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -778,7 +788,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -788,10 +798,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -812,43 +827,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -955,19 +970,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1054,7 +1069,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1064,7 +1079,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1074,10 +1089,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -1095,43 +1115,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1238,19 +1258,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1337,7 +1357,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1347,7 +1367,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1357,10 +1377,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1379,43 +1404,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1522,19 +1547,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1621,7 +1646,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1631,7 +1656,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1641,10 +1666,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1666,43 +1696,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1833,19 +1863,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1948,7 +1978,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1958,7 +1988,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1968,10 +1998,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1993,43 +2028,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2124,19 +2159,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2215,7 +2250,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2225,7 +2260,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2235,10 +2270,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -2260,43 +2300,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2370,19 +2410,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2447,7 +2487,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2457,7 +2497,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2467,10 +2507,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2492,43 +2537,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2620,19 +2665,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2709,7 +2754,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2719,7 +2764,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2729,10 +2774,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2754,43 +2804,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2870,19 +2920,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2951,7 +3001,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2961,7 +3011,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2971,10 +3021,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2989,13 +3044,13 @@ EqLoop:
 func (o *mergeJoinFullOuterOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -3018,43 +3073,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3152,19 +3207,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3245,7 +3300,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3255,7 +3310,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3265,10 +3320,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -3290,43 +3350,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3400,19 +3460,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3477,7 +3537,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3487,7 +3547,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3497,10 +3557,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -3522,43 +3587,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3632,19 +3697,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3709,7 +3774,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3719,7 +3784,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3729,10 +3794,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3753,43 +3823,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3896,19 +3966,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3995,7 +4065,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4005,7 +4075,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4015,10 +4085,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -4036,43 +4111,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4179,19 +4254,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4278,7 +4353,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4288,7 +4363,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4298,10 +4373,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -4320,43 +4400,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4463,19 +4543,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4562,7 +4642,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4572,7 +4652,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4582,10 +4662,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -4607,43 +4692,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4774,19 +4859,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4889,7 +4974,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4899,7 +4984,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4909,10 +4994,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4934,43 +5024,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5065,19 +5155,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5156,7 +5246,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5166,7 +5256,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5176,10 +5266,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -5201,43 +5296,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5311,19 +5406,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5388,7 +5483,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5398,7 +5493,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5408,10 +5503,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -5433,43 +5533,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5561,19 +5661,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5650,7 +5750,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5660,7 +5760,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5670,10 +5770,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -5695,43 +5800,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5811,19 +5916,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5892,7 +5997,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5902,7 +6007,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5912,10 +6017,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -5930,13 +6040,13 @@ EqLoop:
 func (o *mergeJoinFullOuterOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -5959,43 +6069,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6093,19 +6203,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6186,7 +6296,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6196,7 +6306,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6206,10 +6316,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -6231,43 +6346,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6341,19 +6456,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6418,7 +6533,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6428,7 +6543,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6438,10 +6553,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -6463,43 +6583,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6573,19 +6693,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6650,7 +6770,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6660,7 +6780,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6670,10 +6790,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -6694,43 +6819,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6837,19 +6962,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6936,7 +7061,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6946,7 +7071,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6956,10 +7081,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6977,43 +7107,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7120,19 +7250,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7219,7 +7349,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7229,7 +7359,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7239,10 +7369,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -7261,43 +7396,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7404,19 +7539,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7503,7 +7638,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7513,7 +7648,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7523,10 +7658,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -7548,43 +7688,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7715,19 +7855,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7830,7 +7970,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7840,7 +7980,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7850,10 +7990,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -7875,43 +8020,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8006,19 +8151,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8097,7 +8242,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8107,7 +8252,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8117,10 +8262,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -8142,43 +8292,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8252,19 +8402,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8329,7 +8479,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8339,7 +8489,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8349,10 +8499,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -8374,43 +8529,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8502,19 +8657,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8591,7 +8746,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8601,7 +8756,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8611,10 +8766,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -8636,43 +8796,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8752,19 +8912,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8833,7 +8993,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8843,7 +9003,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8853,10 +9013,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -8871,13 +9036,13 @@ EqLoop:
 func (o *mergeJoinFullOuterOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -8900,43 +9065,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9034,19 +9199,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9127,7 +9292,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9137,7 +9302,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9147,10 +9312,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -9172,43 +9342,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9282,19 +9452,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9359,7 +9529,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9369,7 +9539,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9379,10 +9549,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -9404,43 +9579,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9514,19 +9689,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9591,7 +9766,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9601,7 +9776,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9611,10 +9786,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -9635,43 +9815,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9778,19 +9958,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9877,7 +10057,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9887,7 +10067,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9897,10 +10077,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -9918,43 +10103,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -10061,19 +10246,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -10160,7 +10345,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10170,7 +10355,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -10180,10 +10365,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -10202,43 +10392,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -10345,19 +10535,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -10444,7 +10634,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10454,7 +10644,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -10464,10 +10654,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -10489,43 +10684,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -10656,19 +10851,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -10771,7 +10966,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -10781,7 +10976,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -10791,10 +10986,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -10816,43 +11016,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -10947,19 +11147,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -11038,7 +11238,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -11048,7 +11248,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -11058,10 +11258,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -11083,43 +11288,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -11193,19 +11398,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -11270,7 +11475,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -11280,7 +11485,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -11290,10 +11495,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -11315,43 +11525,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -11443,19 +11653,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -11532,7 +11742,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -11542,7 +11752,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -11552,10 +11762,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -11577,43 +11792,43 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -11693,19 +11908,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -11774,7 +11989,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -11784,7 +11999,7 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -11794,10 +12009,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -11838,7 +12058,6 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -11883,8 +12102,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -11948,8 +12167,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12013,8 +12232,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12077,8 +12296,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12138,8 +12357,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12200,8 +12419,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12265,8 +12484,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12330,8 +12549,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12395,8 +12614,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12460,8 +12679,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12525,8 +12744,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12599,8 +12818,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12663,8 +12882,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12727,8 +12946,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12790,8 +13009,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12850,8 +13069,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12911,8 +13130,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -12975,8 +13194,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -13039,8 +13258,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -13103,8 +13322,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -13167,8 +13386,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -13231,8 +13450,8 @@ func (o *mergeJoinFullOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -13306,7 +13525,6 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -13342,8 +13560,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13411,8 +13629,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13480,8 +13698,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13548,8 +13766,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13613,8 +13831,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13679,8 +13897,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13748,8 +13966,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13817,8 +14035,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13886,8 +14104,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -13955,8 +14173,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14024,8 +14242,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14103,8 +14321,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14172,8 +14390,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14241,8 +14459,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14309,8 +14527,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14374,8 +14592,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14440,8 +14658,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14509,8 +14727,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14578,8 +14796,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14647,8 +14865,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14716,8 +14934,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14785,8 +15003,8 @@ func (o *mergeJoinFullOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -14872,20 +15090,6 @@ func (o *mergeJoinFullOuterOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinFullOuterOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -14938,62 +15142,128 @@ func (o *mergeJoinFullOuterOp) exhaustRightSource() {
 	o.proberState.rIdx = o.proberState.rLength
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinFullOuterOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinFullOuterOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinFullOuterOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinFullOuterOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinFullOuterOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinFullOuterOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -15001,30 +15271,29 @@ func (o *mergeJoinFullOuterOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the right one,
@@ -15033,10 +15302,6 @@ func (o *mergeJoinFullOuterOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.lIdx < o.proberState.lLength {
 				o.exhaustLeftSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the left into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
 			// At least one of the sources is finished. If it was the left one,
 			// then we need to emit remaining tuples from the right source with
@@ -15044,49 +15309,51 @@ func (o *mergeJoinFullOuterOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.rIdx < o.proberState.rLength {
 				o.exhaustRightSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the right into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
+			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
 			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_inner.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_inner.eg.go
index c9d8cf562b80..fdd9d665d452 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_inner.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_inner.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinInnerOp{}
 func (o *mergeJoinInnerOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,21 +77,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -189,19 +191,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -221,10 +223,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -246,21 +253,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -334,19 +343,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -366,10 +375,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -391,21 +405,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -479,19 +495,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -511,10 +527,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -535,21 +556,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -656,19 +679,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -688,10 +711,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -709,21 +737,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -830,19 +860,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -862,10 +892,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -884,21 +919,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1005,19 +1042,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1037,10 +1074,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1062,21 +1104,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1207,19 +1251,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1239,10 +1283,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1264,21 +1313,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1373,19 +1424,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1405,10 +1456,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1430,21 +1486,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1518,19 +1576,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1550,10 +1608,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -1575,21 +1638,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1681,19 +1746,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1713,10 +1778,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -1738,21 +1808,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1832,19 +1904,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1864,10 +1936,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -1882,13 +1959,13 @@ EqLoop:
 func (o *mergeJoinInnerOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -1911,21 +1988,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2023,19 +2102,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2055,10 +2134,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2080,21 +2164,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2168,19 +2254,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2200,10 +2286,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2225,21 +2316,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2313,19 +2406,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2345,10 +2438,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -2369,21 +2467,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2490,19 +2590,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2522,10 +2622,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -2543,21 +2648,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2664,19 +2771,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2696,10 +2803,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -2718,21 +2830,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2839,19 +2953,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2871,10 +2985,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -2896,21 +3015,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3041,19 +3162,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3073,10 +3194,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -3098,21 +3224,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3207,19 +3335,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3239,10 +3367,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -3264,21 +3397,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3352,19 +3487,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3384,10 +3519,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -3409,21 +3549,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3515,19 +3657,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3547,10 +3689,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -3572,21 +3719,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3666,19 +3815,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3698,10 +3847,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -3716,13 +3870,13 @@ EqLoop:
 func (o *mergeJoinInnerOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -3745,21 +3899,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3857,19 +4013,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3889,10 +4045,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -3914,21 +4075,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4002,19 +4165,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4034,10 +4197,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -4059,21 +4227,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4147,19 +4317,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4179,10 +4349,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -4203,21 +4378,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4324,19 +4501,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4356,10 +4533,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -4377,21 +4559,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4498,19 +4682,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4530,10 +4714,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -4552,21 +4741,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4673,19 +4864,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4705,10 +4896,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -4730,21 +4926,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4875,19 +5073,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4907,10 +5105,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4932,21 +5135,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5041,19 +5246,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5073,10 +5278,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -5098,21 +5308,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5186,19 +5398,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5218,10 +5430,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -5243,21 +5460,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5349,19 +5568,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5381,10 +5600,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -5406,21 +5630,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5500,19 +5726,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5532,10 +5758,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -5550,13 +5781,13 @@ EqLoop:
 func (o *mergeJoinInnerOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -5579,21 +5810,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5691,19 +5924,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5723,10 +5956,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5748,21 +5986,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5836,19 +6076,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5868,10 +6108,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5893,21 +6138,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5981,19 +6228,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6013,10 +6260,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -6037,21 +6289,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6158,19 +6412,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6190,10 +6444,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6211,21 +6470,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6332,19 +6593,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6364,10 +6625,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -6386,21 +6652,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6507,19 +6775,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6539,10 +6807,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6564,21 +6837,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6709,19 +6984,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6741,10 +7016,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6766,21 +7046,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6875,19 +7157,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6907,10 +7189,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6932,21 +7219,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7020,19 +7309,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7052,10 +7341,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -7077,21 +7371,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7183,19 +7479,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7215,10 +7511,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7240,21 +7541,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7334,19 +7637,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7366,10 +7669,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7410,7 +7718,6 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -7455,8 +7762,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7517,8 +7824,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7579,8 +7886,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7640,8 +7947,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7698,8 +8005,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7757,8 +8064,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7819,8 +8126,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7881,8 +8188,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7943,8 +8250,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8005,8 +8312,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8067,8 +8374,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8138,8 +8445,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8199,8 +8506,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8260,8 +8567,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8320,8 +8627,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8377,8 +8684,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8435,8 +8742,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8496,8 +8803,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8557,8 +8864,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8618,8 +8925,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8679,8 +8986,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8740,8 +9047,8 @@ func (o *mergeJoinInnerOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8812,7 +9119,6 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -8848,8 +9154,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8915,8 +9221,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8982,8 +9288,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9048,8 +9354,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9111,8 +9417,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9175,8 +9481,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9242,8 +9548,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9309,8 +9615,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9376,8 +9682,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9443,8 +9749,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9510,8 +9816,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9587,8 +9893,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9654,8 +9960,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9721,8 +10027,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9787,8 +10093,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9850,8 +10156,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9914,8 +10220,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9981,8 +10287,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10048,8 +10354,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10115,8 +10421,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10182,8 +10488,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10249,8 +10555,8 @@ func (o *mergeJoinInnerOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10334,20 +10640,6 @@ func (o *mergeJoinInnerOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinInnerOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -10362,62 +10654,128 @@ func (o *mergeJoinInnerOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinInnerOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinInnerOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinInnerOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinInnerOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinInnerOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinInnerOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -10425,65 +10783,75 @@ func (o *mergeJoinInnerOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			o.builderState.lGroups = o.builderState.lGroups[:0]
+			o.builderState.rGroups = o.builderState.rGroups[:0]
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_intersectall.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_intersectall.eg.go
index 11c0e2bfde2d..55c13e7a4139 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_intersectall.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_intersectall.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinIntersectAllOp{}
 func (o *mergeJoinIntersectAllOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,9 +77,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -204,19 +203,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -240,10 +239,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -265,9 +269,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -368,19 +371,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -404,10 +407,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -429,9 +437,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -532,19 +539,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -568,10 +575,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -592,9 +604,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -728,19 +739,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -764,10 +775,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -785,9 +801,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -921,19 +936,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -957,10 +972,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -979,9 +999,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1115,19 +1134,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1151,10 +1170,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1176,9 +1200,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1336,19 +1359,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1372,10 +1395,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1397,9 +1425,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1521,19 +1548,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1557,10 +1584,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1582,9 +1614,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1685,19 +1716,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1721,10 +1752,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -1746,9 +1782,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -1867,19 +1902,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1903,10 +1938,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -1928,9 +1968,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -2037,19 +2076,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2073,10 +2112,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2091,13 +2135,13 @@ EqLoop:
 func (o *mergeJoinIntersectAllOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2120,9 +2164,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2247,19 +2290,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2283,10 +2326,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2308,9 +2356,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2411,19 +2458,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2447,10 +2494,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2472,9 +2524,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2575,19 +2626,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2611,10 +2662,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -2635,9 +2691,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2771,19 +2826,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2807,10 +2862,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -2828,9 +2888,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -2964,19 +3023,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3000,10 +3059,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3022,9 +3086,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3158,19 +3221,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3194,10 +3257,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -3219,9 +3287,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3379,19 +3446,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3415,10 +3482,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -3440,9 +3512,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3564,19 +3635,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3600,10 +3671,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -3625,9 +3701,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3728,19 +3803,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3764,10 +3839,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -3789,9 +3869,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -3910,19 +3989,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3946,10 +4025,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -3971,9 +4055,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
@@ -4080,19 +4163,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4116,10 +4199,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -4134,13 +4222,13 @@ EqLoop:
 func (o *mergeJoinIntersectAllOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -4163,9 +4251,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -4290,19 +4377,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4326,10 +4413,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -4351,9 +4443,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -4454,19 +4545,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4490,10 +4581,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -4515,9 +4611,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -4618,19 +4713,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4654,10 +4749,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -4678,9 +4778,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -4814,19 +4913,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4850,10 +4949,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -4871,9 +4975,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5007,19 +5110,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5043,10 +5146,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -5065,9 +5173,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5201,19 +5308,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5237,10 +5344,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -5262,9 +5374,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5422,19 +5533,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5458,10 +5569,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -5483,9 +5599,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5607,19 +5722,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5643,10 +5758,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -5668,9 +5788,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5771,19 +5890,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5807,10 +5926,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -5832,9 +5956,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -5953,19 +6076,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5989,10 +6112,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -6014,9 +6142,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
@@ -6123,19 +6250,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6159,10 +6286,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -6177,13 +6309,13 @@ EqLoop:
 func (o *mergeJoinIntersectAllOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -6206,9 +6338,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -6333,19 +6464,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6369,10 +6500,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -6394,9 +6530,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -6497,19 +6632,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6533,10 +6668,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -6558,9 +6698,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -6661,19 +6800,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6697,10 +6836,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -6721,9 +6865,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -6857,19 +7000,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6893,10 +7036,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6914,9 +7062,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7050,19 +7197,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7086,10 +7233,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -7108,9 +7260,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7244,19 +7395,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7280,10 +7431,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -7305,9 +7461,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7465,19 +7620,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7501,10 +7656,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -7526,9 +7686,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7650,19 +7809,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7686,10 +7845,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -7711,9 +7875,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7814,19 +7977,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7850,10 +8013,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -7875,9 +8043,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -7996,19 +8163,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -8032,10 +8199,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -8057,9 +8229,8 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
@@ -8166,19 +8337,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -8202,10 +8373,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -8246,7 +8422,6 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -8291,8 +8466,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8353,8 +8528,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8415,8 +8590,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8476,8 +8651,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8534,8 +8709,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8593,8 +8768,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8655,8 +8830,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8717,8 +8892,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8779,8 +8954,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8841,8 +9016,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8903,8 +9078,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8974,8 +9149,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9035,8 +9210,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9096,8 +9271,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9156,8 +9331,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9213,8 +9388,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9271,8 +9446,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9332,8 +9507,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9393,8 +9568,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9454,8 +9629,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9515,8 +9690,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9576,8 +9751,8 @@ func (o *mergeJoinIntersectAllOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9648,7 +9823,6 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -9684,8 +9858,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9751,8 +9925,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9818,8 +9992,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9884,8 +10058,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9947,8 +10121,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10011,8 +10185,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10078,8 +10252,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10145,8 +10319,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10212,8 +10386,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10279,8 +10453,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10346,8 +10520,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10423,8 +10597,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10490,8 +10664,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10557,8 +10731,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10623,8 +10797,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10686,8 +10860,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10750,8 +10924,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10817,8 +10991,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10884,8 +11058,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10951,8 +11125,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11018,8 +11192,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11085,8 +11259,8 @@ func (o *mergeJoinIntersectAllOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11170,20 +11344,6 @@ func (o *mergeJoinIntersectAllOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinIntersectAllOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -11198,60 +11358,134 @@ func (o *mergeJoinIntersectAllOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinIntersectAllOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinIntersectAllOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinIntersectAllOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinIntersectAllOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinIntersectAllOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinIntersectAllOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if bg.helper.builderState.numEmittedTotal == bg.helper.numRightTuples {
+				// For INTERSECT ALL joins we build min(# left tuples, # right
+				// tuples), and we have already reached the number of tuples
+				// from the right. Thus, we have to skip all tuples from the
+				// left that are part of the buffered group since they don't
+				// have a match.
+				skipLeftBufferedGroup = true
+			}
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -11259,65 +11493,75 @@ func (o *mergeJoinIntersectAllOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			o.builderState.lGroups = o.builderState.lGroups[:0]
+			o.builderState.rGroups = o.builderState.rGroups[:0]
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
+			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
 			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_leftanti.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_leftanti.eg.go
index b674d672e283..dbc70f4fa95c 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_leftanti.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_leftanti.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinLeftAntiOp{}
 func (o *mergeJoinLeftAntiOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,32 +77,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -200,19 +201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -262,7 +263,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -272,10 +273,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -297,32 +303,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -396,19 +403,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -450,7 +457,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -460,10 +467,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -485,32 +497,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -584,19 +597,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -638,7 +651,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -648,10 +661,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -672,32 +690,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -804,19 +823,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -869,7 +888,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -879,10 +898,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -900,32 +924,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1032,19 +1057,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -1097,7 +1122,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1107,10 +1132,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1129,32 +1159,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1261,19 +1292,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -1326,7 +1357,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1336,10 +1367,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1361,32 +1397,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1517,19 +1554,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -1590,7 +1627,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1600,10 +1637,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1625,32 +1667,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1745,19 +1788,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -1806,7 +1849,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1816,10 +1859,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1841,32 +1889,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1940,19 +1989,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -1994,7 +2043,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2004,10 +2053,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2029,32 +2083,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2146,19 +2201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -2206,7 +2261,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2216,10 +2271,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2241,32 +2301,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2346,19 +2407,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -2402,7 +2463,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2412,10 +2473,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2430,13 +2496,13 @@ EqLoop:
 func (o *mergeJoinLeftAntiOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2459,32 +2525,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2582,19 +2649,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -2644,7 +2711,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2654,10 +2721,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2679,32 +2751,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2778,19 +2851,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -2832,7 +2905,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2842,10 +2915,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2867,32 +2945,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2966,19 +3045,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -3020,7 +3099,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3030,10 +3109,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3054,32 +3138,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3186,19 +3271,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -3251,7 +3336,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3261,10 +3346,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -3282,32 +3372,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3414,19 +3505,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -3479,7 +3570,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3489,10 +3580,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3511,32 +3607,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3643,19 +3740,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -3708,7 +3805,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3718,10 +3815,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -3743,32 +3845,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3899,19 +4002,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -3972,7 +4075,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3982,10 +4085,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4007,32 +4115,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4127,19 +4236,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -4188,7 +4297,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4198,10 +4307,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -4223,32 +4337,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4322,19 +4437,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -4376,7 +4491,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4386,10 +4501,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -4411,32 +4531,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4528,19 +4649,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -4588,7 +4709,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4598,10 +4719,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -4623,32 +4749,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4728,19 +4855,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -4784,7 +4911,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4794,10 +4921,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -4812,13 +4944,13 @@ EqLoop:
 func (o *mergeJoinLeftAntiOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -4841,32 +4973,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4964,19 +5097,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -5026,7 +5159,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5036,10 +5169,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5061,32 +5199,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5160,19 +5299,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -5214,7 +5353,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5224,10 +5363,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5249,32 +5393,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5348,19 +5493,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -5402,7 +5547,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5412,10 +5557,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -5436,32 +5586,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5568,19 +5719,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -5633,7 +5784,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5643,10 +5794,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -5664,32 +5820,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5796,19 +5953,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -5861,7 +6018,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5871,10 +6028,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -5893,32 +6055,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6025,19 +6188,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -6090,7 +6253,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6100,10 +6263,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6125,32 +6293,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6281,19 +6450,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -6354,7 +6523,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6364,10 +6533,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6389,32 +6563,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6509,19 +6684,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -6570,7 +6745,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6580,10 +6755,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6605,32 +6785,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6704,19 +6885,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -6758,7 +6939,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6768,10 +6949,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -6793,32 +6979,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6910,19 +7097,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -6970,7 +7157,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6980,10 +7167,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7005,32 +7197,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7110,19 +7303,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -7166,7 +7359,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7176,10 +7369,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7194,13 +7392,13 @@ EqLoop:
 func (o *mergeJoinLeftAntiOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -7223,32 +7421,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7346,19 +7545,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -7408,7 +7607,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7418,10 +7617,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -7443,32 +7647,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7542,19 +7747,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -7596,7 +7801,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7606,10 +7811,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -7631,32 +7841,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7730,19 +7941,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -7784,7 +7995,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7794,10 +8005,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -7818,32 +8034,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7950,19 +8167,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -8015,7 +8232,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8025,10 +8242,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -8046,32 +8268,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8178,19 +8401,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -8243,7 +8466,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8253,10 +8476,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -8275,32 +8503,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8407,19 +8636,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -8472,7 +8701,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8482,10 +8711,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -8507,32 +8741,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8663,19 +8898,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -8736,7 +8971,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8746,10 +8981,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -8771,32 +9011,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8891,19 +9132,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -8952,7 +9193,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8962,10 +9203,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -8987,32 +9233,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9086,19 +9333,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -9140,7 +9387,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9150,10 +9397,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -9175,32 +9427,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9292,19 +9545,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -9352,7 +9605,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9362,10 +9615,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -9387,32 +9645,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9492,19 +9751,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With LEFT ANTI join, we are only interested in unmatched tuples
@@ -9548,7 +9807,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9558,10 +9817,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -9602,7 +9866,6 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -9650,8 +9913,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9715,8 +9978,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9780,8 +10043,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9844,8 +10107,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9905,8 +10168,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9967,8 +10230,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10032,8 +10295,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10097,8 +10360,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10162,8 +10425,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10227,8 +10490,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10292,8 +10555,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10366,8 +10629,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10430,8 +10693,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10494,8 +10757,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10557,8 +10820,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10617,8 +10880,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10678,8 +10941,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10742,8 +11005,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10806,8 +11069,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10870,8 +11133,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10934,8 +11197,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10998,8 +11261,8 @@ func (o *mergeJoinLeftAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11070,7 +11333,6 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -11106,8 +11368,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11173,8 +11435,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11240,8 +11502,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11306,8 +11568,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11369,8 +11631,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11433,8 +11695,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11500,8 +11762,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11567,8 +11829,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11634,8 +11896,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11701,8 +11963,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11768,8 +12030,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11845,8 +12107,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11912,8 +12174,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11979,8 +12241,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12045,8 +12307,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12108,8 +12370,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12172,8 +12434,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12239,8 +12501,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12306,8 +12568,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12373,8 +12635,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12440,8 +12702,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12507,8 +12769,8 @@ func (o *mergeJoinLeftAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12592,20 +12854,6 @@ func (o *mergeJoinLeftAntiOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinLeftAntiOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -12632,14 +12880,13 @@ func (o *mergeJoinLeftAntiOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinLeftAntiOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinLeftAntiOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
 		if !groups[i].unmatched {
 			// "Matched" groups are not outputted in LEFT ANTI, RIGHT ANTI,
@@ -12647,51 +12894,118 @@ func (o *mergeJoinLeftAntiOp) calculateOutputCount(groups []group) int {
 			// they do not contribute to the output count.
 			continue
 		}
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinLeftAntiOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinLeftAntiOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftAntiOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftAntiOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -12699,30 +13013,29 @@ func (o *mergeJoinLeftAntiOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the right one,
@@ -12731,49 +13044,51 @@ func (o *mergeJoinLeftAntiOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.lIdx < o.proberState.lLength {
 				o.exhaustLeftSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the left into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_leftouter.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_leftouter.eg.go
index fd05374386f7..f8d6f065d6bd 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_leftouter.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_leftouter.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinLeftOuterOp{}
 func (o *mergeJoinLeftOuterOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,32 +77,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -200,19 +201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -263,7 +264,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -273,10 +274,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -298,32 +304,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -397,19 +404,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -452,7 +459,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -462,10 +469,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -487,32 +499,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -586,19 +599,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -641,7 +654,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -651,10 +664,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -675,32 +693,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -807,19 +826,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -873,7 +892,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -883,10 +902,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -904,32 +928,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1036,19 +1061,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1102,7 +1127,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1112,10 +1137,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1134,32 +1164,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1266,19 +1297,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1332,7 +1363,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1342,10 +1373,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1367,32 +1403,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1523,19 +1560,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1597,7 +1634,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1607,10 +1644,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1632,32 +1674,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1752,19 +1795,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1814,7 +1857,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -1824,10 +1867,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1849,32 +1897,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1948,19 +1997,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2003,7 +2052,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2013,10 +2062,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2038,32 +2092,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2155,19 +2210,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2216,7 +2271,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2226,10 +2281,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2251,32 +2311,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2356,19 +2417,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2413,7 +2474,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2423,10 +2484,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2441,13 +2507,13 @@ EqLoop:
 func (o *mergeJoinLeftOuterOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2470,32 +2536,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2593,19 +2660,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2656,7 +2723,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2666,10 +2733,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2691,32 +2763,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2790,19 +2863,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2845,7 +2918,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -2855,10 +2928,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2880,32 +2958,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2979,19 +3058,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3034,7 +3113,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3044,10 +3123,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3068,32 +3152,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3200,19 +3285,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3266,7 +3351,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3276,10 +3361,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -3297,32 +3387,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3429,19 +3520,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3495,7 +3586,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3505,10 +3596,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3527,32 +3623,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3659,19 +3756,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3725,7 +3822,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -3735,10 +3832,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -3760,32 +3862,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3916,19 +4019,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3990,7 +4093,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4000,10 +4103,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4025,32 +4133,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4145,19 +4254,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4207,7 +4316,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4217,10 +4326,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -4242,32 +4356,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4341,19 +4456,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4396,7 +4511,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4406,10 +4521,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -4431,32 +4551,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4548,19 +4669,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4609,7 +4730,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4619,10 +4740,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -4644,32 +4770,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4749,19 +4876,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4806,7 +4933,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -4816,10 +4943,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -4834,13 +4966,13 @@ EqLoop:
 func (o *mergeJoinLeftOuterOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -4863,32 +4995,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4986,19 +5119,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5049,7 +5182,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5059,10 +5192,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5084,32 +5222,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5183,19 +5322,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5238,7 +5377,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5248,10 +5387,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5273,32 +5417,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5372,19 +5517,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5427,7 +5572,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5437,10 +5582,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -5461,32 +5611,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5593,19 +5744,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5659,7 +5810,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5669,10 +5820,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -5690,32 +5846,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5822,19 +5979,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5888,7 +6045,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -5898,10 +6055,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -5920,32 +6082,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6052,19 +6215,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6118,7 +6281,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6128,10 +6291,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6153,32 +6321,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6309,19 +6478,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6383,7 +6552,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6393,10 +6562,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6418,32 +6592,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6538,19 +6713,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6600,7 +6775,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6610,10 +6785,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6635,32 +6815,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6734,19 +6915,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6789,7 +6970,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -6799,10 +6980,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -6824,32 +7010,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6941,19 +7128,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7002,7 +7189,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7012,10 +7199,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7037,32 +7229,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7142,19 +7335,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7199,7 +7392,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7209,10 +7402,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7227,13 +7425,13 @@ EqLoop:
 func (o *mergeJoinLeftOuterOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -7256,32 +7454,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7379,19 +7578,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7442,7 +7641,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7452,10 +7651,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -7477,32 +7681,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7576,19 +7781,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7631,7 +7836,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7641,10 +7846,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -7666,32 +7876,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7765,19 +7976,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7820,7 +8031,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -7830,10 +8041,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -7854,32 +8070,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7986,19 +8203,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8052,7 +8269,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8062,10 +8279,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -8083,32 +8305,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8215,19 +8438,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8281,7 +8504,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8291,10 +8514,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -8313,32 +8541,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8445,19 +8674,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8511,7 +8740,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8521,10 +8750,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -8546,32 +8780,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8702,19 +8937,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8776,7 +9011,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -8786,10 +9021,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -8811,32 +9051,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8931,19 +9172,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8993,7 +9234,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9003,10 +9244,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -9028,32 +9274,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9127,19 +9374,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9182,7 +9429,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9192,10 +9439,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -9217,32 +9469,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9334,19 +9587,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9395,7 +9648,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9405,10 +9658,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -9430,32 +9688,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if lGroup.unmatched {
-						if curLIdx+1 != curLEndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-						curLIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curLIdx >= o.proberState.lIdx {
+							o.proberState.lIdx = curLIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
 							o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9535,19 +9794,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9592,7 +9851,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the left group will not get a match, so each one of
@@ -9602,10 +9861,15 @@ EqLoop:
 							curLIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -9646,7 +9910,6 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -9691,8 +9954,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9753,8 +10016,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9815,8 +10078,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9876,8 +10139,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9934,8 +10197,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9993,8 +10256,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10055,8 +10318,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10117,8 +10380,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10179,8 +10442,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10241,8 +10504,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10303,8 +10566,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10374,8 +10637,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10435,8 +10698,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10496,8 +10759,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10556,8 +10819,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10613,8 +10876,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10671,8 +10934,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10732,8 +10995,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10793,8 +11056,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10854,8 +11117,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10915,8 +11178,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10976,8 +11239,8 @@ func (o *mergeJoinLeftOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11048,7 +11311,6 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -11084,8 +11346,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11153,8 +11415,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11222,8 +11484,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11290,8 +11552,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11355,8 +11617,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11421,8 +11683,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11490,8 +11752,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11559,8 +11821,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11628,8 +11890,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11697,8 +11959,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11766,8 +12028,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11845,8 +12107,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11914,8 +12176,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -11983,8 +12245,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12051,8 +12313,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12116,8 +12378,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12182,8 +12444,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12251,8 +12513,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12320,8 +12582,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12389,8 +12651,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12458,8 +12720,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12527,8 +12789,8 @@ func (o *mergeJoinLeftOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if rightGroup.nullGroup {
@@ -12614,20 +12876,6 @@ func (o *mergeJoinLeftOuterOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinLeftOuterOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -12662,62 +12910,128 @@ func (o *mergeJoinLeftOuterOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinLeftOuterOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinLeftOuterOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinLeftOuterOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinLeftOuterOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftOuterOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftOuterOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -12725,30 +13039,29 @@ func (o *mergeJoinLeftOuterOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the right one,
@@ -12757,49 +13070,51 @@ func (o *mergeJoinLeftOuterOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.lIdx < o.proberState.lLength {
 				o.exhaustLeftSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the left into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_leftsemi.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_leftsemi.eg.go
index 1460f19892e7..ddab25e50f6c 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_leftsemi.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_leftsemi.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinLeftSemiOp{}
 func (o *mergeJoinLeftSemiOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,21 +77,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -189,19 +191,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -220,10 +222,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -245,21 +252,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -333,19 +342,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -364,10 +373,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -389,21 +403,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -477,19 +493,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -508,10 +524,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -532,21 +553,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -653,19 +676,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -684,10 +707,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -705,21 +733,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -826,19 +856,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -857,10 +887,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -879,21 +914,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1000,19 +1037,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1031,10 +1068,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1056,21 +1098,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1201,19 +1245,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1232,10 +1276,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1257,21 +1306,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1366,19 +1417,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1397,10 +1448,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1422,21 +1478,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1510,19 +1568,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1541,10 +1599,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -1566,21 +1629,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1672,19 +1737,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1703,10 +1768,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -1728,21 +1798,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1822,19 +1894,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -1853,10 +1925,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -1871,13 +1948,13 @@ EqLoop:
 func (o *mergeJoinLeftSemiOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -1900,21 +1977,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2012,19 +2091,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2043,10 +2122,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2068,21 +2152,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2156,19 +2242,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2187,10 +2273,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2212,21 +2303,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2300,19 +2393,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2331,10 +2424,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -2355,21 +2453,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2476,19 +2576,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2507,10 +2607,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -2528,21 +2633,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2649,19 +2756,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2680,10 +2787,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -2702,21 +2814,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2823,19 +2937,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -2854,10 +2968,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -2879,21 +2998,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3024,19 +3145,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3055,10 +3176,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -3080,21 +3206,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3189,19 +3317,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3220,10 +3348,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -3245,21 +3378,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3333,19 +3468,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3364,10 +3499,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -3389,21 +3529,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3495,19 +3637,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3526,10 +3668,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -3551,21 +3698,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3645,19 +3794,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3676,10 +3825,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -3694,13 +3848,13 @@ EqLoop:
 func (o *mergeJoinLeftSemiOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -3723,21 +3877,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3835,19 +3991,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -3866,10 +4022,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -3891,21 +4052,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3979,19 +4142,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4010,10 +4173,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -4035,21 +4203,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4123,19 +4293,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4154,10 +4324,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -4178,21 +4353,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4299,19 +4476,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4330,10 +4507,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -4351,21 +4533,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4472,19 +4656,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4503,10 +4687,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -4525,21 +4714,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4646,19 +4837,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4677,10 +4868,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -4702,21 +4898,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4847,19 +5045,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -4878,10 +5076,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4903,21 +5106,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5012,19 +5217,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5043,10 +5248,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -5068,21 +5278,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5156,19 +5368,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5187,10 +5399,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -5212,21 +5429,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5318,19 +5537,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5349,10 +5568,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -5374,21 +5598,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5468,19 +5694,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5499,10 +5725,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -5517,13 +5748,13 @@ EqLoop:
 func (o *mergeJoinLeftSemiOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -5546,21 +5777,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5658,19 +5891,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5689,10 +5922,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5714,21 +5952,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5802,19 +6042,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5833,10 +6073,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5858,21 +6103,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5946,19 +6193,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -5977,10 +6224,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -6001,21 +6253,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6122,19 +6376,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6153,10 +6407,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6174,21 +6433,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6295,19 +6556,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6326,10 +6587,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -6348,21 +6614,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6469,19 +6737,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6500,10 +6768,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6525,21 +6798,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6670,19 +6945,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6701,10 +6976,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6726,21 +7006,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6835,19 +7117,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -6866,10 +7148,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6891,21 +7178,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6979,19 +7268,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7010,10 +7299,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -7035,21 +7329,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7141,19 +7437,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7172,10 +7468,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7197,21 +7498,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7291,19 +7594,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								leftSemiGroupLength := lGroupLength
@@ -7322,10 +7625,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7366,7 +7674,6 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -7411,8 +7718,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7473,8 +7780,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7535,8 +7842,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7596,8 +7903,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7654,8 +7961,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7713,8 +8020,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7775,8 +8082,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7837,8 +8144,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7899,8 +8206,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7961,8 +8268,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8023,8 +8330,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8094,8 +8401,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8155,8 +8462,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8216,8 +8523,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8276,8 +8583,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8333,8 +8640,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8391,8 +8698,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8452,8 +8759,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8513,8 +8820,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8574,8 +8881,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8635,8 +8942,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8696,8 +9003,8 @@ func (o *mergeJoinLeftSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8768,7 +9075,6 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -8804,8 +9110,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8871,8 +9177,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8938,8 +9244,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9004,8 +9310,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9067,8 +9373,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9131,8 +9437,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9198,8 +9504,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9265,8 +9571,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9332,8 +9638,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9399,8 +9705,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9466,8 +9772,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9543,8 +9849,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9610,8 +9916,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9677,8 +9983,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9743,8 +10049,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9806,8 +10112,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9870,8 +10176,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9937,8 +10243,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10004,8 +10310,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10071,8 +10377,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10138,8 +10444,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10205,8 +10511,8 @@ func (o *mergeJoinLeftSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10290,20 +10596,6 @@ func (o *mergeJoinLeftSemiOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinLeftSemiOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -10318,60 +10610,126 @@ func (o *mergeJoinLeftSemiOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinLeftSemiOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinLeftSemiOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinLeftSemiOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinLeftSemiOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftSemiOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinLeftSemiOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -10379,65 +10737,75 @@ func (o *mergeJoinLeftSemiOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			o.builderState.lGroups = o.builderState.lGroups[:0]
+			o.builderState.rGroups = o.builderState.rGroups[:0]
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_rightanti.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_rightanti.eg.go
index b4509265bfd2..d782f4429bf2 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_rightanti.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_rightanti.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinRightAntiOp{}
 func (o *mergeJoinRightAntiOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,32 +77,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -200,19 +201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -261,7 +262,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -271,10 +272,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -296,32 +302,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -395,19 +402,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -448,7 +455,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -458,10 +465,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -483,32 +495,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -582,19 +595,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -635,7 +648,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -645,10 +658,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -669,32 +687,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -801,19 +820,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -865,7 +884,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -875,10 +894,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -896,32 +920,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1028,19 +1053,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -1092,7 +1117,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1102,10 +1127,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1124,32 +1154,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1256,19 +1287,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -1320,7 +1351,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1330,10 +1361,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1355,32 +1391,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1511,19 +1548,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -1583,7 +1620,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1593,10 +1630,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1618,32 +1660,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1738,19 +1781,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -1798,7 +1841,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1808,10 +1851,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1833,32 +1881,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1932,19 +1981,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -1985,7 +2034,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1995,10 +2044,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2020,32 +2074,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2137,19 +2192,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -2196,7 +2251,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2206,10 +2261,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2231,32 +2291,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2336,19 +2397,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -2391,7 +2452,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2401,10 +2462,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2419,13 +2485,13 @@ EqLoop:
 func (o *mergeJoinRightAntiOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2448,32 +2514,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2571,19 +2638,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -2632,7 +2699,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2642,10 +2709,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2667,32 +2739,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2766,19 +2839,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -2819,7 +2892,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2829,10 +2902,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2854,32 +2932,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2953,19 +3032,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -3006,7 +3085,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3016,10 +3095,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3040,32 +3124,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3172,19 +3257,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -3236,7 +3321,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3246,10 +3331,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -3267,32 +3357,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3399,19 +3490,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -3463,7 +3554,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3473,10 +3564,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3495,32 +3591,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3627,19 +3724,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -3691,7 +3788,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3701,10 +3798,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -3726,32 +3828,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3882,19 +3985,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -3954,7 +4057,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3964,10 +4067,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -3989,32 +4097,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4109,19 +4218,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -4169,7 +4278,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4179,10 +4288,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -4204,32 +4318,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4303,19 +4418,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -4356,7 +4471,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4366,10 +4481,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -4391,32 +4511,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4508,19 +4629,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -4567,7 +4688,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4577,10 +4698,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -4602,32 +4728,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4707,19 +4834,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -4762,7 +4889,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4772,10 +4899,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -4790,13 +4922,13 @@ EqLoop:
 func (o *mergeJoinRightAntiOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -4819,32 +4951,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4942,19 +5075,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -5003,7 +5136,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5013,10 +5146,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5038,32 +5176,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5137,19 +5276,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -5190,7 +5329,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5200,10 +5339,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5225,32 +5369,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5324,19 +5469,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -5377,7 +5522,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5387,10 +5532,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -5411,32 +5561,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5543,19 +5694,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -5607,7 +5758,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5617,10 +5768,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -5638,32 +5794,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5770,19 +5927,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -5834,7 +5991,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5844,10 +6001,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -5866,32 +6028,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5998,19 +6161,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -6062,7 +6225,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6072,10 +6235,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6097,32 +6265,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6253,19 +6422,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -6325,7 +6494,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6335,10 +6504,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6360,32 +6534,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6480,19 +6655,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -6540,7 +6715,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6550,10 +6725,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6575,32 +6755,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6674,19 +6855,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -6727,7 +6908,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6737,10 +6918,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -6762,32 +6948,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6879,19 +7066,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -6938,7 +7125,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6948,10 +7135,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -6973,32 +7165,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7078,19 +7271,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -7133,7 +7326,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7143,10 +7336,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7161,13 +7359,13 @@ EqLoop:
 func (o *mergeJoinRightAntiOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -7190,32 +7388,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7313,19 +7512,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -7374,7 +7573,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7384,10 +7583,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -7409,32 +7613,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7508,19 +7713,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -7561,7 +7766,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7571,10 +7776,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -7596,32 +7806,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7695,19 +7906,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -7748,7 +7959,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7758,10 +7969,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -7782,32 +7998,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7914,19 +8131,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -7978,7 +8195,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7988,10 +8205,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -8009,32 +8231,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8141,19 +8364,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -8205,7 +8428,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8215,10 +8438,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -8237,32 +8465,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8369,19 +8598,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -8433,7 +8662,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8443,10 +8672,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -8468,32 +8702,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8624,19 +8859,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -8696,7 +8931,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8706,10 +8941,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -8731,32 +8971,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8851,19 +9092,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -8911,7 +9152,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8921,10 +9162,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -8946,32 +9192,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9045,19 +9292,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -9098,7 +9345,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9108,10 +9355,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -9133,32 +9385,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9250,19 +9503,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -9309,7 +9562,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9319,10 +9572,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -9344,32 +9602,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9449,19 +9708,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// With RIGHT ANTI join, we are only interested in unmatched tuples
@@ -9504,7 +9763,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9514,10 +9773,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -9558,7 +9822,6 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -9603,8 +9866,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9668,8 +9931,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9733,8 +9996,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9797,8 +10060,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9858,8 +10121,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9920,8 +10183,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9985,8 +10248,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10050,8 +10313,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10115,8 +10378,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10180,8 +10443,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10245,8 +10508,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10319,8 +10582,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10383,8 +10646,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10447,8 +10710,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10510,8 +10773,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10570,8 +10833,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10631,8 +10894,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10695,8 +10958,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10759,8 +11022,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10823,8 +11086,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10887,8 +11150,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10951,8 +11214,8 @@ func (o *mergeJoinRightAntiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -11026,7 +11289,6 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -11062,8 +11324,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11129,8 +11391,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11196,8 +11458,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11262,8 +11524,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11325,8 +11587,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11389,8 +11651,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11456,8 +11718,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11523,8 +11785,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11590,8 +11852,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11657,8 +11919,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11724,8 +11986,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11801,8 +12063,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11868,8 +12130,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11935,8 +12197,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12001,8 +12263,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12064,8 +12326,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12128,8 +12390,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12195,8 +12457,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12262,8 +12524,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12329,8 +12591,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12396,8 +12658,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12463,8 +12725,8 @@ func (o *mergeJoinRightAntiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12548,20 +12810,6 @@ func (o *mergeJoinRightAntiOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinRightAntiOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -12586,14 +12834,13 @@ func (o *mergeJoinRightAntiOp) exhaustRightSource() {
 	o.proberState.rIdx = o.proberState.rLength
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinRightAntiOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinRightAntiOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
 		if !groups[i].unmatched {
 			// "Matched" groups are not outputted in LEFT ANTI, RIGHT ANTI,
@@ -12601,49 +12848,116 @@ func (o *mergeJoinRightAntiOp) calculateOutputCount(groups []group) int {
 			// they do not contribute to the output count.
 			continue
 		}
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinRightAntiOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinRightAntiOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.rGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.rGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightAntiOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightAntiOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -12651,30 +12965,29 @@ func (o *mergeJoinRightAntiOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the left one,
@@ -12683,49 +12996,51 @@ func (o *mergeJoinRightAntiOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.rIdx < o.proberState.rLength {
 				o.exhaustRightSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the right into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_rightouter.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_rightouter.eg.go
index 4bc582ea5d27..bd6ae5db6595 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_rightouter.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_rightouter.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinRightOuterOp{}
 func (o *mergeJoinRightOuterOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,32 +77,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -200,19 +201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -262,7 +263,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -272,10 +273,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -297,32 +303,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -396,19 +403,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -450,7 +457,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -460,10 +467,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -485,32 +497,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -584,19 +597,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -638,7 +651,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -648,10 +661,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -672,32 +690,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -804,19 +823,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -869,7 +888,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -879,10 +898,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -900,32 +924,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1032,19 +1057,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1097,7 +1122,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1107,10 +1132,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -1129,32 +1159,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1261,19 +1292,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1326,7 +1357,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1336,10 +1367,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1361,32 +1397,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1517,19 +1554,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1590,7 +1627,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1600,10 +1637,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1625,32 +1667,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1745,19 +1788,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1806,7 +1849,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -1816,10 +1859,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1841,32 +1889,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1940,19 +1989,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -1994,7 +2043,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2004,10 +2053,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -2029,32 +2083,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2146,19 +2201,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2206,7 +2261,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2216,10 +2271,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -2241,32 +2301,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2346,19 +2407,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2402,7 +2463,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2412,10 +2473,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -2430,13 +2496,13 @@ EqLoop:
 func (o *mergeJoinRightOuterOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -2459,32 +2525,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2582,19 +2649,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2644,7 +2711,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2654,10 +2721,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2679,32 +2751,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2778,19 +2851,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -2832,7 +2905,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -2842,10 +2915,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2867,32 +2945,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2966,19 +3045,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3020,7 +3099,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3030,10 +3109,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -3054,32 +3138,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3186,19 +3271,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3251,7 +3336,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3261,10 +3346,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -3282,32 +3372,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3414,19 +3505,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3479,7 +3570,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3489,10 +3580,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -3511,32 +3607,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3643,19 +3740,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3708,7 +3805,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3718,10 +3815,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -3743,32 +3845,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3899,19 +4002,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -3972,7 +4075,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -3982,10 +4085,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4007,32 +4115,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4127,19 +4236,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4188,7 +4297,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4198,10 +4307,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -4223,32 +4337,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4322,19 +4437,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4376,7 +4491,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4386,10 +4501,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -4411,32 +4531,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4528,19 +4649,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4588,7 +4709,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4598,10 +4719,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -4623,32 +4749,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4728,19 +4855,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -4784,7 +4911,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -4794,10 +4921,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -4812,13 +4944,13 @@ EqLoop:
 func (o *mergeJoinRightOuterOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -4841,32 +4973,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4964,19 +5097,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5026,7 +5159,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5036,10 +5169,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5061,32 +5199,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5160,19 +5299,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5214,7 +5353,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5224,10 +5363,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5249,32 +5393,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5348,19 +5493,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5402,7 +5547,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5412,10 +5557,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -5436,32 +5586,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5568,19 +5719,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5633,7 +5784,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5643,10 +5794,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -5664,32 +5820,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5796,19 +5953,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -5861,7 +6018,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -5871,10 +6028,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -5893,32 +6055,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6025,19 +6188,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6090,7 +6253,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6100,10 +6263,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6125,32 +6293,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6281,19 +6450,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6354,7 +6523,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6364,10 +6533,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6389,32 +6563,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6509,19 +6684,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6570,7 +6745,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6580,10 +6755,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6605,32 +6785,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6704,19 +6885,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6758,7 +6939,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6768,10 +6949,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -6793,32 +6979,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6910,19 +7097,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -6970,7 +7157,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -6980,10 +7167,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7005,32 +7197,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7110,19 +7303,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7166,7 +7359,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7176,10 +7369,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7194,13 +7392,13 @@ EqLoop:
 func (o *mergeJoinRightOuterOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -7223,32 +7421,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7346,19 +7545,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7408,7 +7607,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7418,10 +7617,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -7443,32 +7647,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7542,19 +7747,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7596,7 +7801,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7606,10 +7811,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -7631,32 +7841,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7730,19 +7941,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -7784,7 +7995,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -7794,10 +8005,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -7818,32 +8034,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7950,19 +8167,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8015,7 +8232,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8025,10 +8242,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -8046,32 +8268,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8178,19 +8401,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8243,7 +8466,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8253,10 +8476,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -8275,32 +8503,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8407,19 +8636,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8472,7 +8701,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8482,10 +8711,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -8507,32 +8741,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8663,19 +8898,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8736,7 +8971,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8746,10 +8981,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -8771,32 +9011,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -8891,19 +9132,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -8952,7 +9193,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -8962,10 +9203,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -8987,32 +9233,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9086,19 +9333,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9140,7 +9387,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9150,10 +9397,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -9175,32 +9427,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9292,19 +9545,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9352,7 +9605,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9362,10 +9615,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -9387,32 +9645,33 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					if rGroup.unmatched {
-						if curRIdx+1 != curREndIdx {
-							colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-						}
 						// The row already does not have a match, so we don't need to do any
 						// additional processing.
 						o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-						curRIdx++
-						areGroupsProcessed = true
+						if lastEqCol && curRIdx >= o.proberState.rIdx {
+							o.proberState.rIdx = curRIdx + 1
+						}
+						continue
 					}
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
 							o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -9492,19 +9751,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								// Neither group ends with the batch, so add the group to the
@@ -9548,7 +9807,7 @@ EqLoop:
 							}
 						}
 					}
-					if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+					if !o.groups.isLastGroupInCol() {
 						// The current group is not the last one within the column, so it cannot be
 						// extended into the next batch, and we need to process it right now. Any
 						// unprocessed row in the right group will not get a match, so each one of
@@ -9558,10 +9817,15 @@ EqLoop:
 							curRIdx++
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -9602,7 +9866,6 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -9647,8 +9910,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9712,8 +9975,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9777,8 +10040,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9841,8 +10104,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9902,8 +10165,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -9964,8 +10227,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10029,8 +10292,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10094,8 +10357,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10159,8 +10422,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10224,8 +10487,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10289,8 +10552,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10363,8 +10626,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10427,8 +10690,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10491,8 +10754,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10554,8 +10817,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10614,8 +10877,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10675,8 +10938,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10739,8 +11002,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10803,8 +11066,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10867,8 +11130,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10931,8 +11194,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -10995,8 +11258,8 @@ func (o *mergeJoinRightOuterOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									if leftGroup.nullGroup {
@@ -11070,7 +11333,6 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -11106,8 +11368,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11173,8 +11435,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11240,8 +11502,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11306,8 +11568,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11369,8 +11631,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11433,8 +11695,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11500,8 +11762,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11567,8 +11829,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11634,8 +11896,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11701,8 +11963,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11768,8 +12030,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11845,8 +12107,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11912,8 +12174,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -11979,8 +12241,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12045,8 +12307,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12108,8 +12370,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12172,8 +12434,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12239,8 +12501,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12306,8 +12568,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12373,8 +12635,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12440,8 +12702,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12507,8 +12769,8 @@ func (o *mergeJoinRightOuterOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -12592,20 +12854,6 @@ func (o *mergeJoinRightOuterOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinRightOuterOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -12638,62 +12886,128 @@ func (o *mergeJoinRightOuterOp) exhaustRightSource() {
 	o.proberState.rIdx = o.proberState.rLength
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinRightOuterOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinRightOuterOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinRightOuterOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinRightOuterOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightOuterOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightOuterOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -12701,30 +13015,29 @@ func (o *mergeJoinRightOuterOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			// Next, we need to make sure that builder state is set up for a case when
-			// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-			// scenario the merge joiner is done, so it'll be outputting zero-length
-			// batches from now on.
 			o.builderState.lGroups = o.builderState.lGroups[:0]
 			o.builderState.rGroups = o.builderState.rGroups[:0]
 			// At least one of the sources is finished. If it was the left one,
@@ -12733,49 +13046,51 @@ func (o *mergeJoinRightOuterOp) Next() coldata.Batch {
 			// finished, then there is nothing left to do.
 			if o.proberState.rIdx < o.proberState.rLength {
 				o.exhaustRightSource()
-				// We unset o.outputReady here because we want to put as many unmatched
-				// tuples from the right into the output batch. Once outCount reaches the
-				// desired output batch size, the output will be returned.
-				o.outputReady = false
 			}
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_rightsemi.eg.go b/pkg/sql/colexec/colexecjoin/mergejoiner_rightsemi.eg.go
index a864e99a40b7..d2cd18ca9289 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_rightsemi.eg.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_rightsemi.eg.go
@@ -48,13 +48,13 @@ var _ colexecop.Operator = &mergeJoinRightSemiOp{}
 func (o *mergeJoinRightSemiOp) probeBodyLSeltrueRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -77,21 +77,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -189,19 +191,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -219,10 +221,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -244,21 +251,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -332,19 +341,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -362,10 +371,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -387,21 +401,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -475,19 +491,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -505,10 +521,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -529,21 +550,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -650,19 +673,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -680,10 +703,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -701,21 +729,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -822,19 +852,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -852,10 +882,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -874,21 +909,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -995,19 +1032,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1025,10 +1062,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -1050,21 +1092,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1195,19 +1239,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1225,10 +1269,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -1250,21 +1299,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1359,19 +1410,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1389,10 +1440,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -1414,21 +1470,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1502,19 +1560,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1532,10 +1590,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -1557,21 +1620,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1663,19 +1728,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1693,10 +1758,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -1718,21 +1788,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -1812,19 +1884,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -1842,10 +1914,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -1860,13 +1937,13 @@ EqLoop:
 func (o *mergeJoinRightSemiOp) probeBodyLSeltrueRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -1889,21 +1966,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2001,19 +2080,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2031,10 +2110,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -2056,21 +2140,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2144,19 +2230,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2174,10 +2260,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -2199,21 +2290,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2287,19 +2380,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2317,10 +2410,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -2341,21 +2439,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2462,19 +2562,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2492,10 +2592,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -2513,21 +2618,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2634,19 +2741,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2664,10 +2771,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -2686,21 +2798,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -2807,19 +2921,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -2837,10 +2951,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -2862,21 +2981,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3007,19 +3128,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3037,10 +3158,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -3062,21 +3188,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3171,19 +3299,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3201,10 +3329,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -3226,21 +3359,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3314,19 +3449,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3344,10 +3479,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -3369,21 +3509,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3475,19 +3617,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3505,10 +3647,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -3530,21 +3677,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(lSel[curLIdx])
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3624,19 +3773,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3654,10 +3803,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -3672,13 +3826,13 @@ EqLoop:
 func (o *mergeJoinRightSemiOp) probeBodyLSelfalseRSeltrue() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -3701,21 +3855,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3813,19 +3969,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3843,10 +3999,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -3868,21 +4029,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -3956,19 +4119,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -3986,10 +4149,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -4011,21 +4179,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4099,19 +4269,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -4129,10 +4299,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -4153,21 +4328,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4274,19 +4451,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -4304,10 +4481,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -4325,21 +4507,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4446,19 +4630,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -4476,10 +4660,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -4498,21 +4687,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4619,19 +4810,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -4649,10 +4840,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -4674,21 +4870,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4819,19 +5017,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -4849,10 +5047,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -4874,21 +5077,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -4983,19 +5188,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5013,10 +5218,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -5038,21 +5248,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5126,19 +5338,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5156,10 +5368,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -5181,21 +5398,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5287,19 +5506,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5317,10 +5536,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -5342,21 +5566,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(rSel[curRIdx])
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5436,19 +5662,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5466,10 +5692,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -5484,13 +5715,13 @@ EqLoop:
 func (o *mergeJoinRightSemiOp) probeBodyLSelfalseRSelfalse() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		lNulls := lVec.Nulls()
 		rNulls := rVec.Nulls()
 		switch lVec.CanonicalTypeFamily() {
@@ -5513,21 +5744,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5625,19 +5858,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5655,10 +5888,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.BytesFamily:
@@ -5680,21 +5918,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5768,19 +6008,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5798,10 +6038,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.DecimalFamily:
@@ -5823,21 +6068,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -5911,19 +6158,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -5941,10 +6188,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntFamily:
@@ -5965,21 +6217,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6086,19 +6340,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6116,10 +6370,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case 32:
 				lKeys := lVec.Int32()
@@ -6137,21 +6396,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6258,19 +6519,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6288,10 +6549,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			case -1:
 			default:
@@ -6310,21 +6576,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6431,19 +6699,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6461,10 +6729,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.FloatFamily:
@@ -6486,21 +6759,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6631,19 +6906,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6661,10 +6936,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.TimestampTZFamily:
@@ -6686,21 +6966,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6795,19 +7077,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6825,10 +7107,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.IntervalFamily:
@@ -6850,21 +7137,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -6938,19 +7227,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -6968,10 +7257,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case types.JsonFamily:
@@ -6993,21 +7287,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7099,19 +7395,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -7129,10 +7425,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		case typeconv.DatumVecCanonicalTypeFamily:
@@ -7154,21 +7455,23 @@ EqLoop:
 					curRIdx := rGroup.rowStartIdx
 					curLEndIdx := lGroup.rowEndIdx
 					curREndIdx := rGroup.rowEndIdx
-					areGroupsProcessed := false
 					// Expand or filter each group based on the current equality column.
-					for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+					for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 						cmp = 0
 						lNull := lNulls.NullAt(curLIdx)
 						rNull := rNulls.NullAt(curRIdx)
 
-						// TODO(yuzefovich): we can advance both sides if both are
-						// NULL.
+						curLIdxInc := 0
 						if lNull {
-							curLIdx++
-							continue
+							curLIdxInc = 1
 						}
+						curRIdxInc := 0
 						if rNull {
-							curRIdx++
+							curRIdxInc = 1
+						}
+						if lNull || rNull {
+							curLIdx += curLIdxInc
+							curRIdx += curRIdxInc
 							continue
 						}
 
@@ -7248,19 +7551,19 @@ EqLoop:
 								curRIdx++
 							}
 
-							// Last equality column and either group is incomplete. Save state
-							// and have it handled in the next iteration.
-							if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-								o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+							// Last equality column and either group is incomplete.
+							if lastEqCol && (!lComplete || !rComplete) {
+								// Store the state about the buffered group.
+								o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+								o.bufferedGroup.leftGroupStartIdx = beginLIdx
 								o.proberState.lIdx = lGroupLength + beginLIdx
-								o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+								o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 								o.proberState.rIdx = rGroupLength + beginRIdx
-
 								o.groups.finishedCol()
-								break EqLoop
+								return
 							}
 
-							if eqColIdx < len(o.left.eqCols)-1 {
+							if !lastEqCol {
 								o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 							} else {
 								o.groups.addRightSemiGroup(beginRIdx, rGroupLength)
@@ -7278,10 +7581,15 @@ EqLoop:
 							}
 						}
 					}
-					// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-					// last elements processed in their respective batches.
-					o.proberState.lIdx = curLIdx
-					o.proberState.rIdx = curRIdx
+					// Both o.proberState.lIdx and o.proberState.rIdx should point
+					// to the last tuples that have been fully processed in their
+					// respective batches. This is the case when we've just finished
+					// the last equality column or the current column is such that
+					// all tuples were filtered out.
+					if lastEqCol || !o.groups.hasGroupForNextCol() {
+						o.proberState.lIdx = curLIdx
+						o.proberState.rIdx = curRIdx
+					}
 				}
 			}
 		default:
@@ -7322,7 +7630,6 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -7367,8 +7674,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7429,8 +7736,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7491,8 +7798,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7552,8 +7859,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7610,8 +7917,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7669,8 +7976,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7731,8 +8038,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7793,8 +8100,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7855,8 +8162,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7917,8 +8224,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -7979,8 +8286,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8050,8 +8357,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8111,8 +8418,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8172,8 +8479,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8232,8 +8539,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8289,8 +8596,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8347,8 +8654,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8408,8 +8715,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8469,8 +8776,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8530,8 +8837,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8591,8 +8898,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8652,8 +8959,8 @@ func (o *mergeJoinRightSemiOp) buildLeftGroupsFromBatch(
 
 									repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 									toAppend := repeatsLeft
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8724,7 +9031,6 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -8760,8 +9066,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8827,8 +9133,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8894,8 +9200,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -8960,8 +9266,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9023,8 +9329,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9087,8 +9393,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9154,8 +9460,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9221,8 +9527,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9288,8 +9594,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9355,8 +9661,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9422,8 +9728,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9499,8 +9805,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9566,8 +9872,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9633,8 +9939,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9699,8 +10005,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9762,8 +10068,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9826,8 +10132,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9893,8 +10199,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -9960,8 +10266,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10027,8 +10333,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10094,8 +10400,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10161,8 +10467,8 @@ func (o *mergeJoinRightSemiOp) buildRightGroupsFromBatch(
 										o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 									}
 									toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-									if outStartIdx+toAppend > outputCapacity {
-										toAppend = outputCapacity - outStartIdx
+									if outStartIdx+toAppend > o.outputCapacity {
+										toAppend = o.outputCapacity - outStartIdx
 									}
 
 									{
@@ -10246,20 +10552,6 @@ func (o *mergeJoinRightSemiOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoinRightSemiOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -10274,58 +10566,129 @@ func (o *mergeJoinRightSemiOp) exhaustRightSource() {
 	// ignored in all joins except for RIGHT OUTER and FULL OUTER.
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoinRightSemiOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoinRightSemiOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoinRightSemiOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoinRightSemiOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.rGroups)
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	numBuilt := o.numBuiltFromBatch(o.builderState.rGroups)
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightSemiOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoinRightSemiOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			// For RIGHT SEMI joins we have already fully built the output based
+			// on all tuples in the right buffered group using the match from
+			// the current left batch. This allows us to simply skip all tuples
+			// that are part of the left buffered group.
+			skipLeftBufferedGroup = true
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
+		}
+		if willEmit > 0 && len(o.outputTypes) != 0 {
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 		}
 		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
 	}
 }
 
@@ -10333,65 +10696,75 @@ func (o *mergeJoinRightSemiOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
-			o.outputReady = true
-			o.builderState.buildFrom = mjBuildFromBatch
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			o.builderState.lGroups = o.builderState.lGroups[:0]
+			o.builderState.rGroups = o.builderState.rGroups[:0]
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
 			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_tmpl.go b/pkg/sql/colexec/colexecjoin/mergejoiner_tmpl.go
index d509a9b0ac40..ff29ce6837f9 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_tmpl.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_tmpl.go
@@ -129,11 +129,10 @@ func _PROBE_SWITCH(_JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation) { /
 				curRIdx := rGroup.rowStartIdx
 				curLEndIdx := lGroup.rowEndIdx
 				curREndIdx := rGroup.rowEndIdx
-				areGroupsProcessed := false
 				_LEFT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE)
 				_RIGHT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE)
 				// Expand or filter each group based on the current equality column.
-				for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed {
+				for curLIdx < curLEndIdx && curRIdx < curREndIdx {
 					cmp = 0
 					lNull := lNulls.NullAt(_L_SEL_IND)
 					rNull := rNulls.NullAt(_R_SEL_IND)
@@ -163,16 +162,19 @@ func _PROBE_SWITCH(_JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation) { /
 					//     so if either value is NULL, the tuples are not
 					//     matches.
 					// */}}
-					// TODO(yuzefovich): we can advance both sides if both are
-					// NULL.
+					curLIdxInc := 0
 					if lNull {
 						_NULL_FROM_LEFT_SWITCH(_JOIN_TYPE)
-						curLIdx++
-						continue
+						curLIdxInc = 1
 					}
+					curRIdxInc := 0
 					if rNull {
 						_NULL_FROM_RIGHT_SWITCH(_JOIN_TYPE)
-						curRIdx++
+						curRIdxInc = 1
+					}
+					if lNull || rNull {
+						curLIdx += curLIdxInc
+						curRIdx += curRIdxInc
 						continue
 					}
 					// {{end}}
@@ -267,19 +269,19 @@ func _PROBE_SWITCH(_JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation) { /
 							curRIdx++
 						}
 
-						// Last equality column and either group is incomplete. Save state
-						// and have it handled in the next iteration.
-						if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) {
-							o.appendToBufferedGroup(&o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength)
+						// Last equality column and either group is incomplete.
+						if lastEqCol && (!lComplete || !rComplete) {
+							// Store the state about the buffered group.
+							o.startLeftBufferedGroup(lSel, beginLIdx, lGroupLength)
+							o.bufferedGroup.leftGroupStartIdx = beginLIdx
 							o.proberState.lIdx = lGroupLength + beginLIdx
-							o.appendToBufferedGroup(&o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength)
+							o.appendToRightBufferedGroup(rSel, beginRIdx, rGroupLength)
 							o.proberState.rIdx = rGroupLength + beginRIdx
-
 							o.groups.finishedCol()
-							break EqLoop
+							return
 						}
 
-						if eqColIdx < len(o.left.eqCols)-1 {
+						if !lastEqCol {
 							o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength)
 						} else {
 							// {{if _JOIN_TYPE.IsLeftSemi}}
@@ -331,10 +333,15 @@ func _PROBE_SWITCH(_JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation) { /
 					}
 				}
 				_PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE)
-				// Both o.proberState.lIdx and o.proberState.rIdx should point to the
-				// last elements processed in their respective batches.
-				o.proberState.lIdx = curLIdx
-				o.proberState.rIdx = curRIdx
+				// Both o.proberState.lIdx and o.proberState.rIdx should point
+				// to the last tuples that have been fully processed in their
+				// respective batches. This is the case when we've just finished
+				// the last equality column or the current column is such that
+				// all tuples were filtered out.
+				if lastEqCol || !o.groups.hasGroupForNextCol() {
+					o.proberState.lIdx = curLIdx
+					o.proberState.rIdx = curRIdx
+				}
 			}
 			// {{end}}
 		}
@@ -361,14 +368,13 @@ func _LEFT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// {{end}}
 	// {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}}
 	if lGroup.unmatched {
-		if curLIdx+1 != curLEndIdx {
-			colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx))
-		}
 		// The row already does not have a match, so we don't need to do any
 		// additional processing.
 		o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx)
-		curLIdx++
-		areGroupsProcessed = true
+		if lastEqCol && curLIdx >= o.proberState.lIdx {
+			o.proberState.lIdx = curLIdx + 1
+		}
+		continue
 	}
 	// {{end}}
 	// {{if or $.JoinType.IsRightOuter $.JoinType.IsRightAnti}}
@@ -403,14 +409,13 @@ func _RIGHT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// {{end}}
 	// {{if or $.JoinType.IsRightOuter $.JoinType.IsRightAnti}}
 	if rGroup.unmatched {
-		if curRIdx+1 != curREndIdx {
-			colexecerror.InternalError(errors.AssertionFailedf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx))
-		}
 		// The row already does not have a match, so we don't need to do any
 		// additional processing.
 		o.groups.addRightUnmatchedGroup(curLIdx, curRIdx)
-		curRIdx++
-		areGroupsProcessed = true
+		if lastEqCol && curRIdx >= o.proberState.rIdx {
+			o.proberState.rIdx = curRIdx + 1
+		}
+		continue
 	}
 	// {{end}}
 	// {{end}}
@@ -605,7 +610,7 @@ func _PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// */}}
 	// {{end}}
 	// {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}}
-	if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+	if !o.groups.isLastGroupInCol() {
 		// The current group is not the last one within the column, so it cannot be
 		// extended into the next batch, and we need to process it right now. Any
 		// unprocessed row in the left group will not get a match, so each one of
@@ -617,7 +622,7 @@ func _PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	}
 	// {{end}}
 	// {{if or $.JoinType.IsRightOuter $.JoinType.IsRightAnti}}
-	if !o.groups.isLastGroupInCol() && !areGroupsProcessed {
+	if !o.groups.isLastGroupInCol() {
 		// The current group is not the last one within the column, so it cannot be
 		// extended into the next batch, and we need to process it right now. Any
 		// unprocessed row in the right group will not get a match, so each one of
@@ -638,13 +643,13 @@ func _PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 func (o *mergeJoin_JOIN_TYPE_STRINGOp) probeBodyLSel_IS_L_SELRSel_IS_R_SEL() {
 	lSel := o.proberState.lBatch.Selection()
 	rSel := o.proberState.rBatch.Selection()
-EqLoop:
 	for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ {
 		leftColIdx := o.left.eqCols[eqColIdx]
 		rightColIdx := o.right.eqCols[eqColIdx]
 		lVec := o.proberState.lBatch.ColVec(int(leftColIdx))
 		rVec := o.proberState.rBatch.ColVec(int(rightColIdx))
 		colType := o.left.sourceTypes[leftColIdx]
+		lastEqCol := eqColIdx == len(o.left.eqCols)-1
 		_PROBE_SWITCH(_JOIN_TYPE, _SEL_ARG)
 		// Look at the groups associated with the next equality column by moving
 		// the circular buffer pointer up.
@@ -706,8 +711,8 @@ func _LEFT_SWITCH(_JOIN_TYPE joinTypeInfo, _HAS_SELECTION bool) { // */}}
 
 					repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx
 					toAppend := repeatsLeft
-					if outStartIdx+toAppend > outputCapacity {
-						toAppend = outputCapacity - outStartIdx
+					if outStartIdx+toAppend > o.outputCapacity {
+						toAppend = o.outputCapacity - outStartIdx
 					}
 
 					// {{if or _JOIN_TYPE.IsRightOuter _JOIN_TYPE.IsRightAnti}}
@@ -791,7 +796,6 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildLeftGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[:len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		LeftColLoop:
 			for colIdx := range input.sourceTypes {
@@ -844,8 +848,8 @@ func _RIGHT_SWITCH(_JOIN_TYPE joinTypeInfo, _HAS_SELECTION bool) { // */}}
 						o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx
 					}
 					toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx
-					if outStartIdx+toAppend > outputCapacity {
-						toAppend = outputCapacity - outStartIdx
+					if outStartIdx+toAppend > o.outputCapacity {
+						toAppend = o.outputCapacity - outStartIdx
 					}
 
 					// {{if _JOIN_TYPE.IsLeftOuter}}
@@ -944,7 +948,6 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildRightGroupsFromBatch(
 	o.unlimitedAllocator.PerformOperation(
 		o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)],
 		func() {
-			outputCapacity := o.output.Capacity()
 			// Loop over every column.
 		RightColLoop:
 			for colIdx := range input.sourceTypes {
@@ -992,20 +995,6 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) probe() {
 	}
 }
 
-// setBuilderSourceToBufferedGroup sets up the builder state to use the
-// buffered group.
-func (o *mergeJoin_JOIN_TYPE_STRINGOp) setBuilderSourceToBufferedGroup() {
-	o.builderState.buildFrom = mjBuildFromBufferedGroup
-	o.bufferedGroup.helper.setupBuilder()
-	o.builderState.totalOutCountFromBufferedGroup = o.bufferedGroup.helper.calculateOutputCount()
-	o.builderState.alreadyEmittedFromBufferedGroup = 0
-
-	// We cannot yet reset the buffered groups because the builder will be taking
-	// input from them. The actual reset will take place on the next call to
-	// initProberState().
-	o.bufferedGroup.needToReset = true
-}
-
 // exhaustLeftSource sets up the builder to process any remaining tuples from
 // the left source. It should only be called when the right source has been
 // exhausted.
@@ -1082,14 +1071,13 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) exhaustRightSource() {
 	// {{end}}
 }
 
-// calculateOutputCount uses the toBuild field of each group and the output
-// batch size to determine the output count. Note that as soon as a group is
+// numBuiltFromBatch uses the toBuild field of each group and the output
+// capacity to determine the output count. Note that as soon as a group is
 // materialized partially or fully to output, its toBuild field is updated
-// accordingly.
-func (o *mergeJoin_JOIN_TYPE_STRINGOp) calculateOutputCount(groups []group) int {
-	count := o.builderState.outCount
-	outputCapacity := o.output.Capacity()
-
+// accordingly. The number of tuples that will be built from batch during the
+// current iteration is returned.
+func (o *mergeJoin_JOIN_TYPE_STRINGOp) numBuiltFromBatch(groups []group) (numBuilt int) {
+	outCount := o.builderState.outCount
 	for i := 0; i < len(groups); i++ {
 		// {{if or _JOIN_TYPE.IsLeftAnti _JOIN_TYPE.IsRightAnti}}
 		if !groups[i].unmatched {
@@ -1099,65 +1087,190 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) calculateOutputCount(groups []group) int
 			continue
 		}
 		// {{end}}
-		count += groups[i].toBuild
+		outCount += groups[i].toBuild
 		groups[i].toBuild = 0
-		if count > o.output.Capacity() {
-			groups[i].toBuild = count - outputCapacity
-			count = outputCapacity
-			return count
+		if outCount > o.outputCapacity {
+			groups[i].toBuild = outCount - o.outputCapacity
+			return o.outputCapacity - o.builderState.outCount
 		}
 	}
-	o.builderState.outFinished = true
-	return count
+	return outCount - o.builderState.outCount
 }
 
-// build creates the cross product, and writes it to the output member.
-func (o *mergeJoin_JOIN_TYPE_STRINGOp) build() {
+// buildFromBatch builds as many output rows as possible from the groups that
+// were complete in the probing batches. New rows are put starting at
+// o.builderState.outCount position until either the capacity is reached or all
+// groups are processed.
+func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildFromBatch() {
 	outStartIdx := o.builderState.outCount
-	switch o.builderState.buildFrom {
-	case mjBuildFromBatch:
-		// {{if or _JOIN_TYPE.IsRightSemi _JOIN_TYPE.IsRightAnti}}
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.rGroups)
-		// {{else}}
-		o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups)
+	// {{if or _JOIN_TYPE.IsRightSemi _JOIN_TYPE.IsRightAnti}}
+	numBuilt := o.numBuiltFromBatch(o.builderState.rGroups)
+	// {{else}}
+	numBuilt := o.numBuiltFromBatch(o.builderState.lGroups)
+	// {{end}}
+	o.builderState.outCount += numBuilt
+	if numBuilt > 0 && len(o.outputTypes) != 0 {
+		// We will be actually building the output if we have columns in the output
+		// batch (meaning that we're not doing query like 'SELECT count(*) ...')
+		// and when builderState.outCount has increased (meaning that we have
+		// something to build).
+		colOffsetForRightGroups := 0
+		// {{if not (or _JOIN_TYPE.IsRightSemi _JOIN_TYPE.IsRightAnti)}}
+		o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
+		colOffsetForRightGroups = len(o.left.sourceTypes)
+		_ = colOffsetForRightGroups
 		// {{end}}
-		if o.output.Width() != 0 && o.builderState.outCount > outStartIdx {
-			// We will be actually building the output if we have columns in the output
-			// batch (meaning that we're not doing query like 'SELECT count(*) ...')
-			// and when builderState.outCount has increased (meaning that we have
-			// something to build).
-			colOffsetForRightGroups := 0
-			// {{if not (or _JOIN_TYPE.IsRightSemi _JOIN_TYPE.IsRightAnti)}}
-			o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx)
-			colOffsetForRightGroups = len(o.left.sourceTypes)
-			_ = colOffsetForRightGroups
-			// {{end}}
-			// {{if not (or _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsLeftAnti)}}
-			o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+		// {{if not (or _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsLeftAnti)}}
+		o.buildRightGroupsFromBatch(o.builderState.rGroups, colOffsetForRightGroups, &o.right, o.proberState.rBatch, outStartIdx)
+		// {{end}}
+	}
+}
+
+// transitionIntoBuildingFromBufferedGroup should be called once we have
+// non-empty right buffered group in order to setup the buffered group builder.
+//
+// It will complete the right buffered group (meaning it'll read all batches
+// from the right input until either the new group is found or the input is
+// exhausted).
+//
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoin_JOIN_TYPE_STRINGOp) transitionIntoBuildingFromBufferedGroup() {
+	if o.proberState.rIdx == o.proberState.rLength {
+		// The right buffered group might extend into the next batch, so we have
+		// to complete it first.
+		o.completeRightBufferedGroup()
+	}
+
+	o.bufferedGroup.helper.setupLeftBuilder()
+
+	// {{if and _JOIN_TYPE.IsLeftAnti _JOIN_TYPE.IsSetOp}}
+	// For EXCEPT ALL joins we build # left tuples - # right tuples output rows
+	// (if positive), so we have to discard first numRightTuples rows from the
+	// left.
+	numSkippedLeft := 0
+	for {
+		groupLength := o.proberState.lIdx - o.bufferedGroup.leftGroupStartIdx
+		if numSkippedLeft+groupLength > o.bufferedGroup.helper.numRightTuples {
+			// The current left batch is the first one that contains tuples
+			// without a "match".
+			break
+		}
+		numSkippedLeft += groupLength
+		var groupFinished bool
+		if o.proberState.lIdx < o.proberState.lLength {
+			// The group on the left is finished within the current left
+			// batch.
+			groupFinished = true
+		} else {
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			groupFinished = o.proberState.lIdx == 0
+		}
+		if groupFinished {
+			// We have less matching tuples on the left than on the right, so we
+			// don't emit any output for this buffered group.
+			o.bufferedGroup.helper.Reset(o.Ctx)
+			o.state = mjEntry
+			return
+		}
+	}
+	// We might need to skip some tuples in the current left batch since they
+	// still had matches with the right side.
+	toSkipInThisBatch := o.bufferedGroup.helper.numRightTuples - numSkippedLeft
+	startIdx := o.bufferedGroup.leftGroupStartIdx + toSkipInThisBatch
+	// {{else}}
+	startIdx := o.bufferedGroup.leftGroupStartIdx
+	// {{end}}
+
+	o.bufferedGroup.helper.prepareForNextLeftBatch(o.proberState.lBatch, startIdx, o.proberState.lIdx)
+	o.state = mjBuildFromBufferedGroup
+}
+
+// buildFromBufferedGroup builds the output based on the current buffered group
+// and puts new tuples starting at position b.builderState.outCount. It returns
+// true once the output for the buffered group has been fully populated.
+//
+// It is assumed that transitionIntoBuildingFromBufferedGroup has been called.
+//
+// For a more detailed explanation and an example please refer to the comment at
+// the top of mergejoiner.go.
+func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildFromBufferedGroup() (bufferedGroupComplete bool) {
+	bg := &o.bufferedGroup
+	// Iterate until either we use up the whole capacity of the output batch or
+	// we complete the buffered group.
+	for {
+		if bg.helper.builderState.left.curSrcStartIdx == o.proberState.lLength {
+			// The output has been fully built from the current left batch.
+			bg.leftBatchDone = true
+		}
+		if bg.leftBatchDone {
+			// The current left batch has been fully processed with regards to
+			// the buffered group.
+			bg.leftBatchDone = false
+			if o.proberState.lIdx < o.proberState.lLength {
+				// The group on the left is finished within the current left
+				// batch.
+				return true
+			}
+			var skipLeftBufferedGroup bool
+			// {{if _JOIN_TYPE.IsRightSemi}}
+			// For RIGHT SEMI joins we have already fully built the output based
+			// on all tuples in the right buffered group using the match from
+			// the current left batch. This allows us to simply skip all tuples
+			// that are part of the left buffered group.
+			skipLeftBufferedGroup = true
+			// {{else if and _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsSetOp}}
+			if bg.helper.builderState.numEmittedTotal == bg.helper.numRightTuples {
+				// For INTERSECT ALL joins we build min(# left tuples, # right
+				// tuples), and we have already reached the number of tuples
+				// from the right. Thus, we have to skip all tuples from the
+				// left that are part of the buffered group since they don't
+				// have a match.
+				skipLeftBufferedGroup = true
+			}
 			// {{end}}
+			if skipLeftBufferedGroup {
+				// Keep fetching the next batch from the left input until we
+				// either find the start of the new group or we exhaust the
+				// input.
+				for o.proberState.lIdx == o.proberState.lLength && o.proberState.lLength > 0 {
+					o.continueLeftBufferedGroup()
+				}
+				return true
+			}
+			// Fetch the next batch from the left input and calculate the
+			// boundaries of the buffered group.
+			o.continueLeftBufferedGroup()
+			if o.proberState.lIdx == 0 {
+				return true
+			}
+			bg.helper.prepareForNextLeftBatch(
+				o.proberState.lBatch, bg.leftGroupStartIdx, o.proberState.lIdx,
+			)
 		}
 
-	case mjBuildFromBufferedGroup:
-		willEmit := o.builderState.totalOutCountFromBufferedGroup - o.builderState.alreadyEmittedFromBufferedGroup
-		if o.builderState.outCount+willEmit > o.output.Capacity() {
-			willEmit = o.output.Capacity() - o.builderState.outCount
+		willEmit := bg.helper.canEmit()
+		if o.builderState.outCount+willEmit > o.outputCapacity {
+			willEmit = o.outputCapacity - o.builderState.outCount
 		} else {
-			o.builderState.outFinished = true
+			bg.leftBatchDone = true
 		}
-		o.builderState.outCount += willEmit
-		o.builderState.alreadyEmittedFromBufferedGroup += willEmit
-		if o.output.Width() != 0 && willEmit > 0 {
+		if willEmit > 0 && len(o.outputTypes) != 0 {
 			// {{if not (or _JOIN_TYPE.IsRightSemi _JOIN_TYPE.IsRightAnti)}}
-			o.bufferedGroup.helper.buildFromLeftInput(o.Ctx, outStartIdx)
+			bg.helper.buildFromLeftInput(o.Ctx, o.builderState.outCount)
 			// {{end}}
 			// {{if not (or _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsLeftAnti)}}
-			o.bufferedGroup.helper.buildFromRightInput(o.Ctx, outStartIdx)
+			bg.helper.buildFromRightInput(o.Ctx, o.builderState.outCount)
 			// {{end}}
 		}
-
-	default:
-		colexecerror.InternalError(errors.AssertionFailedf("unsupported mjBuildFrom %d", o.builderState.buildFrom))
-
+		o.builderState.outCount += willEmit
+		bg.helper.builderState.numEmittedCurLeftBatch += willEmit
+		bg.helper.builderState.numEmittedTotal += willEmit
+		if o.builderState.outCount == o.outputCapacity {
+			return false
+		}
 	}
 }
 
@@ -1167,18 +1280,8 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) build() {
 // builder.
 func _SOURCE_FINISHED_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// {{define "sourceFinishedSwitch" -}}
-	o.outputReady = true
-	o.builderState.buildFrom = mjBuildFromBatch
-	// {{if or $.JoinType.IsInner (or $.JoinType.IsLeftSemi $.JoinType.IsRightSemi)}}
-	o.setBuilderSourceToBufferedGroup()
-	// {{else}}
-	// Next, we need to make sure that builder state is set up for a case when
-	// neither exhaustLeftSource nor exhaustRightSource is called below. In such
-	// scenario the merge joiner is done, so it'll be outputting zero-length
-	// batches from now on.
 	o.builderState.lGroups = o.builderState.lGroups[:0]
 	o.builderState.rGroups = o.builderState.rGroups[:0]
-	// {{end}}
 	// {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}}
 	// At least one of the sources is finished. If it was the right one,
 	// then we need to emit remaining tuples from the left source with
@@ -1186,10 +1289,6 @@ func _SOURCE_FINISHED_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// finished, then there is nothing left to do.
 	if o.proberState.lIdx < o.proberState.lLength {
 		o.exhaustLeftSource()
-		// We unset o.outputReady here because we want to put as many unmatched
-		// tuples from the left into the output batch. Once outCount reaches the
-		// desired output batch size, the output will be returned.
-		o.outputReady = false
 	}
 	// {{end}}
 	// {{if or $.JoinType.IsRightOuter $.JoinType.IsRightAnti}}
@@ -1199,10 +1298,6 @@ func _SOURCE_FINISHED_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}}
 	// finished, then there is nothing left to do.
 	if o.proberState.rIdx < o.proberState.rLength {
 		o.exhaustRightSource()
-		// We unset o.outputReady here because we want to put as many unmatched
-		// tuples from the right into the output batch. Once outCount reaches the
-		// desired output batch size, the output will be returned.
-		o.outputReady = false
 	}
 	// {{end}}
 	// {{end}}
@@ -1215,63 +1310,74 @@ func (o *mergeJoin_JOIN_TYPE_STRINGOp) Next() coldata.Batch {
 	o.output, _ = o.unlimitedAllocator.ResetMaybeReallocate(
 		o.outputTypes, o.output, 1 /* minDesiredCapacity */, o.memoryLimit,
 	)
+	o.outputCapacity = o.output.Capacity()
 	o.bufferedGroup.helper.output = o.output
+	o.builderState.outCount = 0
 	for {
 		switch o.state {
 		case mjEntry:
-			o.initProberState()
-
-			if o.nonEmptyBufferedGroup() {
-				o.state = mjFinishBufferedGroup
-				break
+			// If this is the first batch or we're done with the current batch,
+			// get the next batch.
+			if o.proberState.lBatch == nil || (o.proberState.lLength != 0 && o.proberState.lIdx == o.proberState.lLength) {
+				o.proberState.lIdx, o.proberState.lBatch = 0, o.left.source.Next()
+				o.proberState.lLength = o.proberState.lBatch.Length()
+			}
+			if o.proberState.rBatch == nil || (o.proberState.rLength != 0 && o.proberState.rIdx == o.proberState.rLength) {
+				o.proberState.rIdx, o.proberState.rBatch = 0, o.right.source.Next()
+				o.proberState.rLength = o.proberState.rBatch.Length()
 			}
-
 			if o.sourceFinished() {
 				o.state = mjSourceFinished
 				break
 			}
-
 			o.state = mjProbe
+
 		case mjSourceFinished:
 			_SOURCE_FINISHED_SWITCH(_JOIN_TYPE)
-			o.state = mjBuild
-		case mjFinishBufferedGroup:
-			o.finishProbe()
-			o.setBuilderSourceToBufferedGroup()
-			o.state = mjBuild
+			if len(o.builderState.lGroups) == 0 && len(o.builderState.rGroups) == 0 {
+				o.state = mjDone
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
+			}
+			o.state = mjBuildFromBatch
+
 		case mjProbe:
 			o.probe()
-			o.setBuilderSourceToBatch()
-			o.state = mjBuild
-		case mjBuild:
-			o.build()
-
-			if o.builderState.outFinished {
+			o.builderState.lGroups, o.builderState.rGroups = o.groups.getGroups()
+			if len(o.builderState.lGroups) > 0 || len(o.builderState.rGroups) > 0 {
+				o.state = mjBuildFromBatch
+			} else if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
 				o.state = mjEntry
-				o.builderState.outFinished = false
 			}
 
-			if o.outputReady || o.builderState.outCount == o.output.Capacity() {
-				if o.builderState.outCount == 0 {
-					// We have already fully emitted the result of the join, so we
-					// transition to "finished" state.
-					o.state = mjDone
-					continue
-				}
+		case mjBuildFromBatch:
+			o.buildFromBatch()
+			if o.builderState.outCount == o.outputCapacity {
 				o.output.SetLength(o.builderState.outCount)
-				// Reset builder out count.
-				o.builderState.outCount = 0
-				o.outputReady = false
 				return o.output
 			}
-		case mjDone:
-			// Note that resetting of buffered group will close disk queues
-			// (if there are any).
-			if o.bufferedGroup.needToReset {
+			if o.bufferedGroup.helper.numRightTuples != 0 {
+				o.transitionIntoBuildingFromBufferedGroup()
+			} else {
+				o.state = mjEntry
+			}
+
+		case mjBuildFromBufferedGroup:
+			bufferedGroupComplete := o.buildFromBufferedGroup()
+			if bufferedGroupComplete {
 				o.bufferedGroup.helper.Reset(o.Ctx)
-				o.bufferedGroup.needToReset = false
+				o.state = mjEntry
+			}
+			if o.builderState.outCount == o.outputCapacity {
+				o.output.SetLength(o.builderState.outCount)
+				return o.output
 			}
+
+		case mjDone:
 			return coldata.ZeroBatch
+
 		default:
 			colexecerror.InternalError(errors.AssertionFailedf("unexpected merge joiner state in Next: %v", o.state))
 		}
diff --git a/pkg/sql/colexec/colexecjoin/mergejoiner_util.go b/pkg/sql/colexec/colexecjoin/mergejoiner_util.go
index 74df84023034..10dae4754e7f 100644
--- a/pkg/sql/colexec/colexecjoin/mergejoiner_util.go
+++ b/pkg/sql/colexec/colexecjoin/mergejoiner_util.go
@@ -148,6 +148,12 @@ func (b *circularGroupsBuffer) isLastGroupInCol() bool {
 	return b.startIdx == b.nextColStartIdx
 }
 
+// hasGroupForNextCol returns true if at least one group has already been added
+// for the next column.
+func (b *circularGroupsBuffer) hasGroupForNextCol() bool {
+	return b.nextColStartIdx != b.endIdx
+}
+
 // ensureCapacityForNewGroup makes sure that groups slices have enough space to
 // add another group to the buffer, reallocating the slices if necessary.
 func (b *circularGroupsBuffer) ensureCapacityForNewGroup() {
diff --git a/pkg/sql/colexec/crossjoiner_test.go b/pkg/sql/colexec/crossjoiner_test.go
index 05eefdd2a6e4..b138175c9a0e 100644
--- a/pkg/sql/colexec/crossjoiner_test.go
+++ b/pkg/sql/colexec/crossjoiner_test.go
@@ -190,7 +190,7 @@ func getCJTestCases() []*joinTestCase {
 			leftTypes:   []*types.T{types.Int},
 			rightTypes:  []*types.T{types.Int},
 			leftTuples:  colexectestutils.Tuples{{0}, {1}, {2}},
-			rightTuples: colexectestutils.Tuples{{3}},
+			rightTuples: colexectestutils.Tuples{{3}, {4}},
 			leftOutCols: []uint32{0},
 			joinType:    descpb.LeftSemiJoin,
 			expected:    colexectestutils.Tuples{{0}, {1}, {2}},
@@ -254,7 +254,7 @@ func getCJTestCases() []*joinTestCase {
 			expected:    colexectestutils.Tuples{{0}, {1}, {2}, {3}},
 		},
 		{
-			description: "intersect all join, right non-empty",
+			description: "intersect all join, right smaller",
 			leftTypes:   []*types.T{types.Int},
 			rightTypes:  []*types.T{types.Int},
 			leftTuples:  colexectestutils.Tuples{{0}, {1}, {2}, {3}, {4}},
@@ -263,6 +263,16 @@ func getCJTestCases() []*joinTestCase {
 			joinType:    descpb.IntersectAllJoin,
 			expected:    colexectestutils.Tuples{{0}, {1}, {2}},
 		},
+		{
+			description: "intersect all join, right larger",
+			leftTypes:   []*types.T{types.Int},
+			rightTypes:  []*types.T{types.Int},
+			leftTuples:  colexectestutils.Tuples{{0}, {1}, {2}},
+			rightTuples: colexectestutils.Tuples{{3}, {nil}, {3}, {3}, {4}},
+			leftOutCols: []uint32{0},
+			joinType:    descpb.IntersectAllJoin,
+			expected:    colexectestutils.Tuples{{0}, {1}, {2}},
+		},
 		{
 			description: "intersect all join, left empty",
 			leftTypes:   []*types.T{types.Int},
@@ -288,14 +298,26 @@ func getCJTestCases() []*joinTestCase {
 			expected:              colexectestutils.Tuples{},
 		},
 		{
-			description: "except all join, right non-empty",
+			description: "except all join, right smaller",
 			leftTypes:   []*types.T{types.Int},
 			rightTypes:  []*types.T{types.Int},
 			leftTuples:  colexectestutils.Tuples{{0}, {1}, {2}, {3}, {4}},
 			rightTuples: colexectestutils.Tuples{{3}, {nil}, {3}},
 			leftOutCols: []uint32{0},
 			joinType:    descpb.ExceptAllJoin,
-			expected:    colexectestutils.Tuples{{0}, {1}},
+			expected:    colexectestutils.Tuples{{3}, {4}},
+		},
+		{
+			description: "except all join, right larger",
+			leftTypes:   []*types.T{types.Int},
+			rightTypes:  []*types.T{types.Int},
+			leftTuples:  colexectestutils.Tuples{{0}, {1}, {2}},
+			rightTuples: colexectestutils.Tuples{{3}, {nil}, {3}, {3}, {4}},
+			leftOutCols: []uint32{0},
+			joinType:    descpb.ExceptAllJoin,
+			// Injecting nulls into the right input won't change the output.
+			skipAllNullsInjection: true,
+			expected:              colexectestutils.Tuples{},
 		},
 		{
 			description: "except all join, left empty",
diff --git a/pkg/sql/colexec/mergejoiner_test.go b/pkg/sql/colexec/mergejoiner_test.go
index 77006a79d576..40780fa33c93 100644
--- a/pkg/sql/colexec/mergejoiner_test.go
+++ b/pkg/sql/colexec/mergejoiner_test.go
@@ -1612,6 +1612,46 @@ func getMJTestCases() []*joinTestCase {
 			rightEqColsAreKey: true,
 			expected:          colexectestutils.Tuples{{1}},
 		},
+		{
+			description: "LEFT SEMI join with non-unique eq column",
+			joinType:    descpb.LeftSemiJoin,
+			leftTypes:   []*types.T{types.Int, types.Int},
+			rightTypes:  []*types.T{types.Int, types.Int},
+			leftTuples:  colexectestutils.Tuples{{nil, 4}, {nil, 2}, {0, nil}, {0, 3}, {0, nil}, {1, nil}, {3, 3}, {3, nil}, {3, 0}, {4, 0}},
+			rightTuples: colexectestutils.Tuples{{1, nil}, {nil, nil}, {nil, 0}, {3, 1}, {3, 1}, {1, 1}, {nil, 2}, {2, 2}, {3, 3}, {2, 4}},
+			leftEqCols:  []uint32{0},
+			rightEqCols: []uint32{1},
+			leftOutCols: []uint32{0, 1},
+			expected:    colexectestutils.Tuples{{0, nil}, {0, 3}, {0, nil}, {1, nil}, {3, 3}, {3, nil}, {3, 0}, {4, 0}},
+		},
+		{
+			description:     "FULL OUTER join with nulls and DESC",
+			joinType:        descpb.FullOuterJoin,
+			leftTypes:       []*types.T{types.Int, types.Int},
+			rightTypes:      []*types.T{types.Int, types.Int},
+			leftTuples:      colexectestutils.Tuples{{0, 4}, {nil, 0}, {3, 0}},
+			rightTuples:     colexectestutils.Tuples{{0, 1}, {nil, 0}, {4, nil}},
+			leftOutCols:     []uint32{0, 1},
+			rightOutCols:    []uint32{0, 1},
+			leftEqCols:      []uint32{1},
+			rightEqCols:     []uint32{1},
+			leftDirections:  []execinfrapb.Ordering_Column_Direction{execinfrapb.Ordering_Column_DESC},
+			rightDirections: []execinfrapb.Ordering_Column_Direction{execinfrapb.Ordering_Column_DESC},
+			expected:        colexectestutils.Tuples{{0, 4, nil, nil}, {nil, nil, 0, 1}, {nil, 0, nil, 0}, {3, 0, nil, 0}, {nil, nil, 4, nil}},
+		},
+		{
+			description:  "RIGHT OUTER join with a single match in the middle",
+			joinType:     descpb.RightOuterJoin,
+			leftTypes:    []*types.T{types.Int, types.Int},
+			rightTypes:   []*types.T{types.Int, types.Int},
+			leftTuples:   colexectestutils.Tuples{{nil, 4}, {nil, 4}, {2, nil}, {2, nil}, {2, 4}, {3, 0}},
+			rightTuples:  colexectestutils.Tuples{{nil, 1}, {1, nil}, {1, 0}, {2, 4}, {3, nil}, {3, 2}},
+			leftOutCols:  []uint32{0, 1},
+			rightOutCols: []uint32{0, 1},
+			leftEqCols:   []uint32{0, 1},
+			rightEqCols:  []uint32{0, 1},
+			expected:     colexectestutils.Tuples{{nil, nil, nil, 1}, {nil, nil, 1, nil}, {nil, nil, 1, 0}, {2, 4, 2, 4}, {nil, nil, 3, nil}, {nil, nil, 3, 2}},
+		},
 	}
 	return withMirrors(mjTestCases)
 }
diff --git a/pkg/sql/logictest/testdata/logic_test/cross_join b/pkg/sql/logictest/testdata/logic_test/cross_join
new file mode 100644
index 000000000000..919b74bcfc31
--- /dev/null
+++ b/pkg/sql/logictest/testdata/logic_test/cross_join
@@ -0,0 +1,84 @@
+# Check that the cross join is performed in a streaming fashion with regards to
+# the left input (if it wasn't, the query below would hit the temporary disk
+# storage limit in fakedist-disk config).
+
+statement ok
+CREATE TABLE t (
+    i2    INT2,
+    i4    INT4,
+    i8    INT8,
+    f4    FLOAT4,
+    f8    FLOAT8,
+    s     STRING,
+    c     CHAR,
+    b     BYTES,
+    dc    DECIMAL,
+    ival  INTERVAL,
+    oid   OID,
+    tstz  TIMESTAMPTZ,
+    ts    TIMESTAMP,
+    da    DATE,
+    inet  INET,
+    vb    VARBIT
+);
+INSERT
+  INTO t
+SELECT i::INT2,
+       i::INT4,
+       i::INT8,
+       i::FLOAT4,
+       i::FLOAT8,
+       i::STRING,
+       i::CHAR,
+       i::STRING::BYTES,
+       i::DECIMAL,
+       i::INTERVAL,
+       i::OID,
+       i::TIMESTAMPTZ,
+       i::TIMESTAMP,
+       i::DATE,
+       ('127.0.0.' || i::STRING)::INET,
+       i::VARBIT
+  FROM (
+         SELECT i FROM generate_series(1, 2) as t(i)
+         UNION ALL SELECT NULL
+       );
+SELECT *
+  FROM (
+        SELECT a.i2,
+               b.i4,
+               c.i8,
+               d.f4,
+               e.f8,
+               f.s,
+               g.c,
+               h.b,
+               i.dc,
+               j.ival,
+               k.oid,
+               l.tstz,
+               m.ts,
+               n.da,
+               o.inet,
+               p.vb,
+               random() AS r
+          FROM t AS a,
+               t AS b,
+               t AS c,
+               t AS d,
+               t AS e,
+               t AS f,
+               t AS g,
+               t AS h,
+               t AS i,
+               t AS j,
+               t AS k,
+               t AS l,
+               t AS m,
+               t AS n,
+               t AS o,
+               t AS p
+       )
+ WHERE r < .01
+ LIMIT 1
+
diff --git a/pkg/sql/opt/exec/execbuilder/testdata/vectorize_local b/pkg/sql/opt/exec/execbuilder/testdata/vectorize_local
index 0cc75705a171..aba3ce5f0de5 100644
--- a/pkg/sql/opt/exec/execbuilder/testdata/vectorize_local
+++ b/pkg/sql/opt/exec/execbuilder/testdata/vectorize_local
@@ -447,3 +447,28 @@ EXPLAIN (VEC) DELETE FROM t70438 WHERE k=3 OR v=6
       └ *colexec.SerialUnorderedSynchronizer
         ├ *colfetcher.ColBatchScan
         └ *colfetcher.ColBatchScan
+
+# Some tests for set-op cross joins.
+statement ok
+CREATE TABLE t ();
+CREATE TABLE u ();
+INSERT INTO t (rowid) VALUES (1), (2);
+INSERT INTO u (rowid) VALUES (1);
+
+query T
+EXPLAIN (VEC) SELECT * FROM t INTERSECT ALL SELECT * FROM u
+----
+│
+└ Node 1
+  └ *colexecjoin.crossJoiner
+    ├ *colfetcher.ColBatchScan
+    └ *colfetcher.ColBatchScan
+
+query T
+EXPLAIN (VEC) SELECT * FROM t EXCEPT ALL SELECT * FROM u
+----
+│
+└ Node 1
+  └ *colexecjoin.crossJoiner
+    ├ *colfetcher.ColBatchScan
+    └ *colfetcher.ColBatchScan