Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39982: [Java] Add RunEndEncodedVector #43888

Merged
merged 18 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ public ColumnBinder visit(ArrowType.Union type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.Map type) {
return new MapBinder((MapVector) vector);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ public List<ArrowBuf> visit(ArrowType.Union type) {
}
}


lidavidm marked this conversation as resolved.
Show resolved Hide resolved
@Override
public List<ArrowBuf> visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException("Importing buffers for type: " + type);
}

@Override
public List<ArrowBuf> visit(ArrowType.Map type) {
return Arrays.asList(maybeImportBitmap(type), importOffsets(type, MapVector.OFFSET_WIDTH));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,11 @@ public Boolean visit(ArrowType.ListView type) {
public Boolean visit(ArrowType.LargeListView type) {
throw new UnsupportedOperationException("Binding is not yet supported for type " + type);
}

@Override
public Boolean visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,5 +284,11 @@ public AvaticaParameter visit(ArrowType.LargeListView type) {
throw new UnsupportedOperationException(
"AvaticaParameter not yet supported for type " + type);
}

@Override
public AvaticaParameter visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}
}
}
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@
name: "LargeListView",
fields: [],
complex: true
},
{
name: "RunEndEncoded",
fields: [],
complex: true
}
]
}
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/templates/UnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {

private static final int NUM_SUPPORTED_TYPES = 50;
private static final int NUM_SUPPORTED_TYPES = 51;

private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
import org.apache.arrow.vector.types.pojo.ArrowType.Map;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
Expand Down Expand Up @@ -280,6 +281,11 @@ public TypeLayout visit(Interval type) {
public TypeLayout visit(Duration type) {
return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
}

@Override
public TypeLayout visit(RunEndEncoded type) {
return new TypeLayout(Collections.<BufferLayout>emptyList());
}
});
return layout;
}
Expand Down Expand Up @@ -444,6 +450,11 @@ public Integer visit(Interval type) {
public Integer visit(Duration type) {
return FIXED_WIDTH_BUFFER_COUNT;
}

@Override
public Integer visit(RunEndEncoded type) {
return 0;
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
Expand All @@ -41,11 +42,13 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;

/** Visitor to compare a range of values for vectors. */
public class RangeEqualsVisitor implements VectorVisitor<Boolean, Range> {

private ValueVector left;
private ValueVector right;

Expand Down Expand Up @@ -226,6 +229,14 @@ public Boolean visit(NullVector left, Range range) {
return true;
}

@Override
public Boolean visit(RunEndEncodedVector left, Range range) {
if (!validate(left)) {
return false;
}
return compareRunEndEncodedVectors(range);
}

@Override
public Boolean visit(ExtensionTypeVector<?> left, Range range) {
if (!(right instanceof ExtensionTypeVector<?>) || !validate(left)) {
Expand Down Expand Up @@ -255,6 +266,48 @@ public Boolean visit(LargeListViewVector left, Range range) {
return compareLargeListViewVectors(range);
}

protected boolean compareRunEndEncodedVectors(Range range) {
RunEndEncodedVector leftVector = (RunEndEncodedVector) left;
RunEndEncodedVector rightVector = (RunEndEncodedVector) right;

final int leftRangeEnd = range.getLeftStart() + range.getLength();
final int rightRangeEnd = range.getRightStart() + range.getLength();

FieldVector leftValuesVector = leftVector.getValuesVector();
FieldVector rightValuesVector = rightVector.getValuesVector();

RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null);

int leftLogicalIndex = range.getLeftStart();
int rightLogicalIndex = range.getRightStart();

while (leftLogicalIndex < leftRangeEnd) {
// TODO: implement it more efficient
// https://github.com/apache/arrow/issues/44157
int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to do a binary search on every step? One thing we could consider for the future is an iterator that can avoid that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your suggestion, I'll try to do it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can file a new issue for it and leave a TODO here.

int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex);
if (leftValuesVector.accept(
innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) {
int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex);
int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex);

int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex;
int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex;

if (leftRunLength != rightRunLength) {
return false;
} else {
leftLogicalIndex = leftRunEnd;
rightLogicalIndex = rightRunEnd;
}
} else {
return false;
}
}

return true;
}

protected RangeEqualsVisitor createInnerVisitor(
ValueVector leftInner,
ValueVector rightInner,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.Field;

Expand Down Expand Up @@ -136,6 +137,11 @@ public Boolean visit(LargeListViewVector left, Void value) {
return compareField(left.getField(), right.getField());
}

@Override
public Boolean visit(RunEndEncodedVector left, Void value) {
return compareField(left.getField(), right.getField());
}

private boolean compareField(Field leftField, Field rightField) {

if (leftField == rightField) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.UnionVector;

/**
Expand Down Expand Up @@ -71,4 +72,9 @@ default OUT visit(LargeListViewVector left, IN value) {
throw new UnsupportedOperationException(
"VectorVisitor for LargeListViewVector is not supported.");
}

default OUT visit(RunEndEncodedVector left, IN value) {
throw new UnsupportedOperationException(
"VectorVisitor for LargeListViewVector is not supported.");
};
}
Loading
Loading