Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combine linearizer and ast_plan #8900

Merged
merged 15 commits into from
Aug 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ test:
- test -f $PREFIX/lib/libcudftestutil.a
- test -f $PREFIX/include/cudf/aggregation.hpp
- test -f $PREFIX/include/cudf/ast/transform.hpp
- test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
- test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp
- test -f $PREFIX/include/cudf/ast/detail/operators.hpp
- test -f $PREFIX/include/cudf/ast/nodes.hpp
- test -f $PREFIX/include/cudf/ast/operators.hpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ add_library(cudf
src/aggregation/aggregation.cpp
src/aggregation/aggregation.cu
src/aggregation/result_cache.cpp
src/ast/linearizer.cpp
src/ast/expression_parser.cpp
src/ast/transform.cu
src/binaryop/binaryop.cpp
src/binaryop/compiled/binary_ops.cu
Expand Down
339 changes: 339 additions & 0 deletions cpp/include/cudf/ast/detail/expression_parser.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/ast/nodes.hpp>
#include <cudf/ast/operators.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <thrust/optional.h>

#include <numeric>

namespace cudf {
namespace ast {
namespace detail {

/**
* @brief Enum defining data reference types used by a node.
*
* This enum is device-specific. For instance, intermediate data references are generated by the
* linearization process but cannot be explicitly created by the user.
*/
enum class device_data_reference_type {
COLUMN, // A value in a table column
LITERAL, // A literal value
INTERMEDIATE // An internal temporary value
};

/**
* @brief A device data reference describes a source of data used by a node.
*
* This is a POD class used to create references describing data type and locations for consumption
* by the `row_evaluator`.
*/
struct alignas(8) device_data_reference {
vyasr marked this conversation as resolved.
Show resolved Hide resolved
device_data_reference(device_data_reference_type reference_type,
cudf::data_type data_type,
cudf::size_type data_index,
table_reference table_source);

device_data_reference(device_data_reference_type reference_type,
cudf::data_type data_type,
cudf::size_type data_index);

device_data_reference_type const reference_type; // Source of data
cudf::data_type const data_type; // Type of data
cudf::size_type const data_index; // The column index of a table, index of a
// literal, or index of an intermediate
table_reference const table_source;

bool operator==(device_data_reference const& rhs) const
{
return std::tie(data_index, reference_type, table_source) ==
std::tie(rhs.data_index, rhs.reference_type, rhs.table_source);
}
};

// Type trait for wrapping nullable types in a thrust::optional. Non-nullable
// types are returned as is.
template <typename T, bool has_nulls>
struct possibly_null_value;

template <typename T>
struct possibly_null_value<T, true> {
using type = thrust::optional<T>;
};

template <typename T>
struct possibly_null_value<T, false> {
using type = T;
};

template <typename T, bool has_nulls>
using possibly_null_value_t = typename possibly_null_value<T, has_nulls>::type;

// Type used for intermediate storage in expression evaluation.
template <bool has_nulls>
using IntermediateDataType = possibly_null_value_t<std::int64_t, has_nulls>;

/**
* @brief A container of all device data required to evaluate an expression on tables.
*
* This struct should never be instantiated directly. It is created by the
* `expression_parser` on construction, and the resulting member is publicly accessible
* for passing to kernels for constructing an `expression_evaluator`.
*
*/
struct expression_device_view {
device_span<detail::device_data_reference const> data_references;
device_span<cudf::detail::fixed_width_scalar_device_view_base const> literals;
device_span<ast_operator const> operators;
device_span<cudf::size_type const> operator_source_indices;
cudf::size_type num_intermediates;
int shmem_per_thread;
};

/**
* @brief The expression_parser traverses an expression and converts it into a form suitable for
* execution on the device.
*
* This class is part of a "visitor" pattern with the `node` class.
*
* This class does pre-processing work on the host, validating operators and operand data types. It
* traverses downward from a root node in a depth-first fashion, capturing information about
* the nodes and constructing vectors of information that are later used by the device for
* evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are
* resolved into intermediate data storage in shared memory.
*/
class expression_parser {
public:
/**
* @brief Construct a new expression_parser object
*
* @param expr The expression to create an evaluable expression_parser for.
* @param left The left table used for evaluating the abstract syntax tree.
* @param right The right table used for evaluating the abstract syntax tree.
*/
expression_parser(node const& expr,
cudf::table_view left,
cudf::table_view right,
bool has_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: _left{left}, _right{right}, _node_count{0}, _intermediate_counter{}, _has_nulls(has_nulls)
{
expr.accept(*this);
move_to_device(stream, mr);
}

/**
* @brief Construct a new expression_parser object
*
* @param expr The expression to create an evaluable expression_parser for.
* @param table The table used for evaluating the abstract syntax tree.
*/
expression_parser(node const& expr,
cudf::table_view table,
bool has_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: expression_parser(expr, table, table, has_nulls, stream, mr)
{
}

/**
* @brief Get the root data type of the abstract syntax tree.
*
* @return cudf::data_type
*/
cudf::data_type output_type() const;

/**
* @brief Visit a literal node.
*
* @param expr Literal node.
* @return cudf::size_type Index of device data reference for the node.
*/
cudf::size_type visit(literal const& expr);

/**
* @brief Visit a column reference node.
*
* @param expr Column reference node.
* @return cudf::size_type Index of device data reference for the node.
*/
cudf::size_type visit(column_reference const& expr);

/**
* @brief Visit an expression node.
*
* @param expr Expression node.
* @return cudf::size_type Index of device data reference for the node.
*/
cudf::size_type visit(expression const& expr);

/**
* @brief Internal class used to track the utilization of intermediate storage locations.
*
* As nodes are being evaluated, they may generate "intermediate" data that is immediately
* consumed. Rather than manifesting this data in global memory, we can store intermediates of any
* fixed width type (up to 8 bytes) by placing them in shared memory. This class helps to track
* the number and indices of intermediate data in shared memory using a give-take model. Locations
* in shared memory can be "taken" and used for storage, "given back," and then later re-used.
* This aims to minimize the maximum amount of shared memory needed at any point during the
* evaluation.
*
*/
class intermediate_counter {
public:
intermediate_counter() : used_values(), max_used(0) {}
cudf::size_type take();
void give(cudf::size_type value);
cudf::size_type get_max_used() const { return max_used; }

private:
/**
* @brief Find the first missing value in a contiguous sequence of integers.
*
* From a sorted container of integers, find the first "missing" value.
* For example, {0, 1, 2, 4, 5} is missing 3, and {1, 2, 3} is missing 0.
* If there are no missing values, return the size of the container.
*
* @return cudf::size_type Smallest value not already in the container.
*/
cudf::size_type find_first_missing() const;

std::vector<cudf::size_type> used_values;
cudf::size_type max_used;
};

expression_device_view device_expression_data; ///< The collection of data required to evaluate
///< the expression on the device.

private:
/**
* @brief Helper function for adding components (operators, literals, etc) to AST plan
*
* @tparam T The underlying type of the input `std::vector`
* @param[in] v The `std::vector` containing components (operators, literals, etc).
* @param[in,out] sizes The `std::vector` containing the size of each data buffer.
* @param[in,out] data_pointers The `std::vector` containing pointers to each data buffer.
*/
template <typename T>
void extract_size_and_pointer(std::vector<T> const& v,
std::vector<cudf::size_type>& sizes,
std::vector<void const*>& data_pointers)
{
auto const data_size = sizeof(T) * v.size();
sizes.push_back(data_size);
data_pointers.push_back(v.data());
}

void move_to_device(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
std::vector<cudf::size_type> sizes;
std::vector<void const*> data_pointers;

extract_size_and_pointer(_data_references, sizes, data_pointers);
extract_size_and_pointer(_literals, sizes, data_pointers);
extract_size_and_pointer(_operators, sizes, data_pointers);
extract_size_and_pointer(_operator_source_indices, sizes, data_pointers);

// Create device buffer
auto const buffer_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
auto buffer_offsets = std::vector<int>(sizes.size());
thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), buffer_offsets.begin(), 0);

auto h_data_buffer = std::vector<char>(buffer_size);
for (unsigned int i = 0; i < data_pointers.size(); ++i) {
std::memcpy(h_data_buffer.data() + buffer_offsets[i], data_pointers[i], sizes[i]);
}

_device_data_buffer = rmm::device_buffer(h_data_buffer.data(), buffer_size, stream, mr);

stream.synchronize();

// Create device pointers to components of plan
auto device_data_buffer_ptr = static_cast<char const*>(_device_data_buffer.data());
device_expression_data.data_references = device_span<detail::device_data_reference const>(
reinterpret_cast<detail::device_data_reference const*>(device_data_buffer_ptr +
buffer_offsets[0]),
_data_references.size());
device_expression_data.literals =
device_span<cudf::detail::fixed_width_scalar_device_view_base const>(
reinterpret_cast<cudf::detail::fixed_width_scalar_device_view_base const*>(
device_data_buffer_ptr + buffer_offsets[1]),
_literals.size());
device_expression_data.operators = device_span<ast_operator const>(
reinterpret_cast<ast_operator const*>(device_data_buffer_ptr + buffer_offsets[2]),
_operators.size());
device_expression_data.operator_source_indices = device_span<cudf::size_type const>(
reinterpret_cast<cudf::size_type const*>(device_data_buffer_ptr + buffer_offsets[3]),
_operator_source_indices.size());
device_expression_data.num_intermediates = _intermediate_counter.get_max_used();
device_expression_data.shmem_per_thread = static_cast<int>(
(_has_nulls ? sizeof(IntermediateDataType<true>) : sizeof(IntermediateDataType<false>)) *
device_expression_data.num_intermediates);
}

/**
* @brief Helper function for recursive traversal of expressions.
*
* When parsing an expression composed of subexpressions, all subexpressions
* must be evaluated before an operator can be applied to them. This method
* performs that recursive traversal (in conjunction with the
* `expression_parser.visit` and `expression.accept` methods if necessary to
* descend deeper into an expression tree).
*
* @param operands The operands to visit.
*
* @return The indices of the operands stored in the data references.
*/
std::vector<cudf::size_type> visit_operands(
std::vector<std::reference_wrapper<node const>> operands);

/**
* @brief Add a data reference to the internal list.
*
* @param data_ref The data reference to add.
*
* @return The index of the added data reference in the internal data references list.
*/
cudf::size_type add_data_reference(detail::device_data_reference data_ref);

rmm::device_buffer
_device_data_buffer; ///< The device-side data buffer containing the plan information, which is
///< owned by this class and persists until it is destroyed.

cudf::table_view const& _left;
cudf::table_view const& _right;
cudf::size_type _node_count;
intermediate_counter _intermediate_counter;
bool _has_nulls;
std::vector<detail::device_data_reference> _data_references;
std::vector<ast_operator> _operators;
std::vector<cudf::size_type> _operator_source_indices;
std::vector<cudf::detail::fixed_width_scalar_device_view_base> _literals;
};

} // namespace detail

} // namespace ast

} // namespace cudf
Loading