Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Java bindings for AST transform #8846

Merged
merged 7 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion java/src/main/java/ai/rapids/cudf/MemoryCleaner.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@

package ai.rapids.cudf;

import ai.rapids.cudf.ast.CompiledExpression;
import ai.rapids.cudf.nvcomp.BatchedLZ4Decompressor;
import ai.rapids.cudf.nvcomp.Decompressor;
import org.slf4j.Logger;
Expand Down Expand Up @@ -272,6 +273,10 @@ static void register(CuFileHandle handle, Cleaner cleaner) {
all.add(new CleanerWeakReference(handle, cleaner, collected, false));
}

public static void register(CompiledExpression expr, Cleaner cleaner) {
all.add(new CleanerWeakReference(expr, cleaner, collected, false));
}

/**
* This is not 100% perfect and we can still run into situations where RMM buffers were not
* collected and this returns false because of thread race conditions. This is just a best effort.
Expand Down
2 changes: 1 addition & 1 deletion java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ ColumnVector[] getColumns() {
}

/** Return the native table view handle for this table */
long getNativeView() {
public long getNativeView() {
return nativeHandle;
}

Expand Down
60 changes: 60 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ast/AstNode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf.ast;

import java.nio.ByteBuffer;

/** Base class of every node in an AST */
abstract class AstNode {
/**
* Enumeration for the types of AST nodes that can appear in a serialized AST.
* NOTE: This must be kept in sync with the `jni_serialized_node_type` in CompiledExpression.cpp!
*/
protected enum NodeType {
VALID_LITERAL(0),
NULL_LITERAL(1),
COLUMN_REFERENCE(2),
UNARY_EXPRESSION(3),
BINARY_EXPRESSION(4);

private final byte nativeId;

NodeType(int nativeId) {
this.nativeId = (byte) nativeId;
assert this.nativeId == nativeId;
}

/** Get the size in bytes to serialize this node type */
int getSerializedSize() {
return Byte.BYTES;
}

/** Serialize this node type to the specified buffer */
void serialize(ByteBuffer bb) {
bb.put(nativeId);
}
}

/** Get the size in bytes of the serialized form of this node and all child nodes */
abstract int getSerializedSize();

/**
* Serialize this node and all child nodes.
* @param bb buffer to receive the serialized data
*/
abstract void serialize(ByteBuffer bb);
}
144 changes: 144 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ast/AstOperator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf.ast;

import java.nio.ByteBuffer;
import java.util.EnumSet;

/**
* Enumeration of AST operations that can appear in an expression
* NOTE: This must be kept in sync with the `jni_to_ast_operator` code in CompiledExpression.cpp!
*/
public enum AstOperator {
// Binary operators
ADD(0), // operator +
SUB(1), // operator -
MUL(2), // operator *
DIV(3), // operator / using common type of lhs and rhs
TRUE_DIV(4), // operator / after promoting type to floating point
FLOOR_DIV(5), // operator / after promoting to 64 bit floating point and then flooring the result
MOD(6), // operator %
PYMOD(7), // operator % but following python's sign rules for negatives
POW(8), // lhs ^ rhs
EQUAL(9), // operator ==
NOT_EQUAL(10), // operator !=
LESS(11), // operator <
GREATER(12), // operator >
LESS_EQUAL(13), // operator <=
GREATER_EQUAL(14), // operator >=
BITWISE_AND(15), // operator &
BITWISE_OR(16), // operator |
BITWISE_XOR(17), // operator ^
LOGICAL_AND(18), // operator &&
LOGICAL_OR(19), // operator ||
// Unary operators
IDENTITY(20), // Identity function
SIN(21), // Trigonometric sine
COS(22), // Trigonometric cosine
TAN(23), // Trigonometric tangent
ARCSIN(24), // Trigonometric sine inverse
ARCCOS(25), // Trigonometric cosine inverse
ARCTAN(26), // Trigonometric tangent inverse
SINH(27), // Hyperbolic sine
COSH(28), // Hyperbolic cosine
TANH(29), // Hyperbolic tangent
ARCSINH(30), // Hyperbolic sine inverse
ARCCOSH(31), // Hyperbolic cosine inverse
ARCTANH(32), // Hyperbolic tangent inverse
EXP(33), // Exponential (base e, Euler number)
LOG(34), // Natural Logarithm (base e)
SQRT(35), // Square-root (x^0.5)
CBRT(36), // Cube-root (x^(1.0/3))
CEIL(37), // Smallest integer value not less than arg
FLOOR(38), // largest integer value not greater than arg
ABS(39), // Absolute value
RINT(40), // Rounds the floating-point argument arg to an integer value
BIT_INVERT(41), // Bitwise Not (~)
NOT(42); // Logical Not (!)

private static final EnumSet<AstOperator> unaryOps = EnumSet.of(
revans2 marked this conversation as resolved.
Show resolved Hide resolved
IDENTITY,
SIN,
COS,
TAN,
ARCSIN,
ARCCOS,
ARCTAN,
SINH,
COSH,
TANH,
ARCSINH,
ARCCOSH,
ARCTANH,
EXP,
LOG,
SQRT,
CBRT,
CEIL,
FLOOR,
ABS,
RINT,
BIT_INVERT,
NOT);

private static final EnumSet<AstOperator> binaryOps = EnumSet.of(
ADD,
SUB,
MUL,
DIV,
TRUE_DIV,
FLOOR_DIV,
MOD,
PYMOD,
POW,
EQUAL,
NOT_EQUAL,
LESS,
GREATER,
LESS_EQUAL,
GREATER_EQUAL,
BITWISE_AND,
BITWISE_OR,
BITWISE_XOR,
LOGICAL_AND,
LOGICAL_OR);

private final byte nativeId;

AstOperator(int nativeId) {
this.nativeId = (byte) nativeId;
assert this.nativeId == nativeId;
}

boolean isUnaryOperator() {
return unaryOps.contains(this);
}

boolean isBinaryOperator() {
return binaryOps.contains(this);
}

/** Get the size in bytes to serialize this operator */
int getSerializedSize() {
return Byte.BYTES;
}

/** Serialize this operator to the specified buffer */
void serialize(ByteBuffer bb) {
bb.put(nativeId);
}
}
51 changes: 51 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ast/BinaryExpression.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf.ast;

import java.nio.ByteBuffer;

/** A binary expression consisting of an operator and two operands. */
public class BinaryExpression extends Expression {
private final AstOperator op;
private final AstNode leftInput;
private final AstNode rightInput;

public BinaryExpression(AstOperator op, AstNode leftInput, AstNode rightInput) {
if (!op.isBinaryOperator()) {
throw new IllegalArgumentException(op + " is not a binary operator");
}
this.op = op;
this.leftInput = leftInput;
this.rightInput = rightInput;
}

@Override
int getSerializedSize() {
return NodeType.BINARY_EXPRESSION.getSerializedSize() +
op.getSerializedSize() +
leftInput.getSerializedSize() +
rightInput.getSerializedSize();
}

@Override
void serialize(ByteBuffer bb) {
NodeType.BINARY_EXPRESSION.serialize(bb);
op.serialize(bb);
leftInput.serialize(bb);
rightInput.serialize(bb);
}
}
51 changes: 51 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ast/ColumnReference.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf.ast;

import java.nio.ByteBuffer;

/** A reference to a column in an input table. */
public final class ColumnReference extends AstNode {
private final int columnIndex;
private final TableReference tableSource;

/** Construct a column reference to either the only or leftmost input table */
public ColumnReference(int columnIndex) {
this(columnIndex, TableReference.LEFT);
}

/** Construct a column reference to the specified column index in the specified table */
public ColumnReference(int columnIndex, TableReference tableSource) {
this.columnIndex = columnIndex;
this.tableSource = tableSource;
}

@Override
int getSerializedSize() {
// node type + table ref + column index
return NodeType.COLUMN_REFERENCE.getSerializedSize() +
tableSource.getSerializedSize() +
Integer.BYTES;
}

@Override
void serialize(ByteBuffer bb) {
NodeType.COLUMN_REFERENCE.serialize(bb);
tableSource.serialize(bb);
bb.putInt(columnIndex);
}
}
Loading