Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PARQUET-34: Make Contains() a composable operator
Browse files Browse the repository at this point in the history
clairemcginty committed Apr 28, 2024

Unverified

This user has not yet uploaded their public signing key.
1 parent 6ccaab1 commit a597fcf
Showing 20 changed files with 517 additions and 358 deletions.
Original file line number Diff line number Diff line change
@@ -21,12 +21,13 @@
import java.io.Serializable;
import java.util.Set;
import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.ArrayColumn;
import org.apache.parquet.filter2.predicate.Operators.BinaryColumn;
import org.apache.parquet.filter2.predicate.Operators.BooleanColumn;
import org.apache.parquet.filter2.predicate.Operators.Column;
import org.apache.parquet.filter2.predicate.Operators.Contains;
import org.apache.parquet.filter2.predicate.Operators.DoesNotContain;
import org.apache.parquet.filter2.predicate.Operators.ContainsAnd;
import org.apache.parquet.filter2.predicate.Operators.ContainsEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsOr;
import org.apache.parquet.filter2.predicate.Operators.ContainsPredicate;
import org.apache.parquet.filter2.predicate.Operators.DoubleColumn;
import org.apache.parquet.filter2.predicate.Operators.Eq;
import org.apache.parquet.filter2.predicate.Operators.FloatColumn;
@@ -41,7 +42,6 @@
import org.apache.parquet.filter2.predicate.Operators.NotEq;
import org.apache.parquet.filter2.predicate.Operators.NotIn;
import org.apache.parquet.filter2.predicate.Operators.Or;
import org.apache.parquet.filter2.predicate.Operators.SupportsContains;
import org.apache.parquet.filter2.predicate.Operators.SupportsEqNotEq;
import org.apache.parquet.filter2.predicate.Operators.SupportsLtGt;
import org.apache.parquet.filter2.predicate.Operators.UserDefined;
@@ -95,11 +95,6 @@ public static BinaryColumn binaryColumn(String columnPath) {
return new BinaryColumn(ColumnPath.fromDotString(columnPath));
}

public static <T extends Comparable<T>, C extends Column<T> & SupportsEqNotEq> ArrayColumn<T> arrayColumn(
C elementColumn) {
return new ArrayColumn<>(elementColumn);
}

/**
* Keeps records if their value is equal to the provided value.
* Nulls are treated the same way the java programming language does.
@@ -266,14 +261,18 @@ public static <T extends Comparable<T>, C extends Column<T> & SupportsEqNotEq> N
return new NotIn<>(column, values);
}

public static <T extends Comparable<T>, C extends Column<T> & SupportsContains> Contains<T> contains(
C column, T value) {
return new Contains<>(column, value);
public static <T extends Comparable<T>, C extends Column<T>> ContainsEq<T> containsEq(C column, T value) {
return new ContainsEq<>(column, value);
}

public static <T extends Comparable<T>, C extends Column<T>> ContainsAnd<T> containsAnd(
ContainsPredicate<T> left, ContainsPredicate<T> right) {
return new ContainsAnd<>(left, right);
}

public static <T extends Comparable<T>, C extends Column<T> & SupportsContains> DoesNotContain<T> doesNotContain(
C column, T value) {
return new DoesNotContain<>(column, value);
public static <T extends Comparable<T>, C extends Column<T>> ContainsOr<T> containsOr(
ContainsPredicate<T> left, ContainsPredicate<T> right) {
return new ContainsOr<>(left, right);
}

/**
Original file line number Diff line number Diff line change
@@ -19,8 +19,10 @@
package org.apache.parquet.filter2.predicate;

import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.Contains;
import org.apache.parquet.filter2.predicate.Operators.DoesNotContain;
import org.apache.parquet.filter2.predicate.Operators.ContainsAnd;
import org.apache.parquet.filter2.predicate.Operators.ContainsEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsNotEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsOr;
import org.apache.parquet.filter2.predicate.Operators.Eq;
import org.apache.parquet.filter2.predicate.Operators.Gt;
import org.apache.parquet.filter2.predicate.Operators.GtEq;
@@ -86,12 +88,20 @@ default <T extends Comparable<T>> R visit(NotIn<T> notIn) {
throw new UnsupportedOperationException("visit NotIn is not supported.");
}

default <T extends Comparable<T>> R visit(Contains<T> contains) {
throw new UnsupportedOperationException("visit Contains is not supported.");
default <T extends Comparable<T>> R visit(ContainsEq<T> containsEq) {
throw new UnsupportedOperationException("visit ContainsEq is not supported.");
}

default <T extends Comparable<T>> R visit(DoesNotContain<T> doesNotContain) {
throw new UnsupportedOperationException("visit DoesNotContain is not supported.");
default <T extends Comparable<T>> R visit(ContainsNotEq<T> containsNotEq) {
throw new UnsupportedOperationException("visit ContainsEq is not supported.");
}

default <T extends Comparable<T>> R visit(ContainsAnd<T> containsAnd) {
throw new UnsupportedOperationException("visit ContainsAnd is not supported.");
}

default <T extends Comparable<T>> R visit(ContainsOr<T> containsOr) {
throw new UnsupportedOperationException("visit ContainsOr is not supported.");
}

R visit(And and);
Original file line number Diff line number Diff line change
@@ -24,8 +24,10 @@
import java.util.Objects;
import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor;
import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.Contains;
import org.apache.parquet.filter2.predicate.Operators.DoesNotContain;
import org.apache.parquet.filter2.predicate.Operators.ContainsAnd;
import org.apache.parquet.filter2.predicate.Operators.ContainsEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsNotEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsOr;
import org.apache.parquet.filter2.predicate.Operators.Eq;
import org.apache.parquet.filter2.predicate.Operators.Gt;
import org.apache.parquet.filter2.predicate.Operators.GtEq;
@@ -101,13 +103,23 @@ public <T extends Comparable<T>> FilterPredicate visit(NotIn<T> notIn) {
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(Contains<T> contains) {
return contains;
public <T extends Comparable<T>> FilterPredicate visit(ContainsEq<T> containsEq) {
return containsEq;
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(DoesNotContain<T> doesNotContain) {
return doesNotContain;
public <T extends Comparable<T>> FilterPredicate visit(ContainsNotEq<T> containsNotEq) {
return containsNotEq;
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(ContainsAnd<T> containsAnd) {
return containsAnd;
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(ContainsOr<T> containsOr) {
return containsOr;
}

@Override
Original file line number Diff line number Diff line change
@@ -21,8 +21,10 @@
import java.util.Objects;
import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor;
import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.Contains;
import org.apache.parquet.filter2.predicate.Operators.DoesNotContain;
import org.apache.parquet.filter2.predicate.Operators.ContainsAnd;
import org.apache.parquet.filter2.predicate.Operators.ContainsEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsNotEq;
import org.apache.parquet.filter2.predicate.Operators.ContainsOr;
import org.apache.parquet.filter2.predicate.Operators.Eq;
import org.apache.parquet.filter2.predicate.Operators.Gt;
import org.apache.parquet.filter2.predicate.Operators.GtEq;
@@ -95,13 +97,23 @@ public <T extends Comparable<T>> FilterPredicate visit(NotIn<T> notIn) {
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(Contains<T> contains) {
return new DoesNotContain<>(contains.getColumn(), contains.getValue());
public <T extends Comparable<T>> FilterPredicate visit(ContainsEq<T> contains) {
throw new UnsupportedOperationException("DoesNotContain not supported yet");
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(DoesNotContain<T> doesNotContain) {
return new Contains<>(doesNotContain.getColumn(), doesNotContain.getValue());
public <T extends Comparable<T>> FilterPredicate visit(ContainsNotEq<T> contains) {
throw new UnsupportedOperationException("DoesNotContainNotEq not supported yet!");
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(ContainsAnd<T> containsAnd) {
return new ContainsOr<>(containsAnd.getLeft(), containsAnd.getRight());
}

@Override
public <T extends Comparable<T>> FilterPredicate visit(ContainsOr<T> containsOr) {
return new ContainsAnd<>(containsOr.getLeft(), containsOr.getRight());
}

@Override
Loading

0 comments on commit a597fcf

Please sign in to comment.