Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement apply() in FIL #5358

Merged
merged 29 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7199b04
Implement apply() in FIL
hcho3 Apr 12, 2023
5588285
Merge remote-tracking branch 'origin/branch-23.06' into predict_leaf_new
hcho3 Apr 12, 2023
361e1ab
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 Apr 17, 2023
db1335a
Fix a subtle bug in Treelite model importer, which led to illegal mem…
hcho3 Apr 20, 2023
3d897d2
Some simple changes
hcho3 Apr 20, 2023
dddfe6d
Merge remote-tracking branch 'upstream/branch-23.06' into predict_lea…
hcho3 Apr 20, 2023
7d77d79
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 Apr 21, 2023
5fd9f7f
Merge remote-tracking branch 'origin/branch-23.06' into predict_leaf_new
hcho3 Apr 27, 2023
4775918
Fix broken merge
hcho3 Apr 27, 2023
67ea706
C++ benchmark for CPU FIL
hcho3 Apr 27, 2023
47ec711
Roll back change to number of repetition
hcho3 Apr 27, 2023
5dd74ef
Merge remote-tracking branch 'origin/branch-23.06' into predict_leaf_new
hcho3 May 9, 2023
7e130e3
Revert "Roll back change to number of repetition"
hcho3 May 9, 2023
29871b8
Revert "C++ benchmark for CPU FIL"
hcho3 May 9, 2023
78d1e3b
Merge remote-tracking branch 'origin/branch-23.06' into predict_leaf_new
hcho3 May 11, 2023
e208c16
Revamp design
hcho3 May 11, 2023
535763e
Avoid unused variable warning
hcho3 May 11, 2023
ea3a55f
Merge remote-tracking branch 'origin/branch-23.06' into predict_leaf_new
hcho3 May 11, 2023
e611588
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 May 11, 2023
7ae0118
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 May 17, 2023
a35877c
Update HDBSCAN runner to work with latest RAFT
hcho3 May 18, 2023
0a572ef
Add template parameter predict_leaf
hcho3 May 19, 2023
94ff224
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 May 19, 2023
9880dc0
Fix formatting
hcho3 May 19, 2023
17d44ed
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 May 22, 2023
ae545bd
Address reviewer's feedback
hcho3 May 22, 2023
fd33d14
Merge branch 'branch-23.06' into predict_leaf_new
hcho3 May 24, 2023
00e5ae4
Avoid compiler warning about unused variable
hcho3 May 25, 2023
2c0627c
Merge branch 'branch-23.06' into predict_leaf_new
dantegd May 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cpp/include/cuml/experimental/fil/decision_forest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct decision_forest {
decision_forest()
: nodes_{},
root_node_indexes_{},
node_id_mapping_{},
vector_output_{},
categorical_storage_{},
num_features_{},
Expand All @@ -125,6 +126,8 @@ struct decision_forest {
* @param nodes A buffer containing all nodes within the forest
* @param root_node_indexes A buffer containing the index of the root node
* of every tree in the forest
* @param node_id_mapping Mapping to use to convert FIL's internal node ID into Treelite's node
* ID. Only relevant when predict_type == infer_kind::leaf_id
* @param num_features The number of features per input sample for this model
* @param num_outputs The number of outputs per row from this model
* @param has_categorical_nodes Whether this forest contains any
Expand Down Expand Up @@ -155,6 +158,7 @@ struct decision_forest {
*/
decision_forest(raft_proto::buffer<node_type>&& nodes,
raft_proto::buffer<index_type>&& root_node_indexes,
raft_proto::buffer<index_type>&& node_id_mapping,
index_type num_features,
index_type num_outputs = index_type{2},
bool has_categorical_nodes = false,
Expand All @@ -169,6 +173,7 @@ struct decision_forest {
io_type postproc_constant = io_type{1})
: nodes_{nodes},
root_node_indexes_{root_node_indexes},
node_id_mapping_{node_id_mapping},
vector_output_{vector_output},
categorical_storage_{categorical_storage},
num_features_{num_features},
Expand Down Expand Up @@ -207,6 +212,8 @@ struct decision_forest {
if (inference_kind == infer_kind::per_tree) {
result = num_trees();
if (has_vector_leaves()) { result *= num_outputs_; }
} else if (inference_kind == infer_kind::leaf_id) {
result = num_trees();
}
return result;
}
Expand All @@ -233,6 +240,8 @@ struct decision_forest {
* @param[in] predict_type Type of inference to perform. Defaults to summing
* the outputs of all trees and produce an output per row. If set to
* "per_tree", we will instead output all outputs of individual trees.
* If set to "leaf_id", we will output the integer ID of the leaf node
* for each tree.
* @param[in] specified_rows_per_block_iter If non-nullopt, this value is
* used to determine how many rows are evaluated for each inference
* iteration within a CUDA block. Runtime performance is quite sensitive
Expand Down Expand Up @@ -301,6 +310,8 @@ struct decision_forest {
raft_proto::buffer<node_type> nodes_;
/** The index of the root node for each tree in the forest */
raft_proto::buffer<index_type> root_node_indexes_;
/** Mapping to apply to node IDs. Only relevant when predict_type == infer_kind::leaf_id */
raft_proto::buffer<index_type> node_id_mapping_;
/** Buffer of outputs for all leaves in vector-leaf models */
std::optional<raft_proto::buffer<io_type>> vector_output_;
/** Buffer of elements used as backing data for bitsets which specify
Expand All @@ -323,6 +334,7 @@ struct decision_forest {
{
return forest_type{nodes_.data(),
root_node_indexes_.data(),
node_id_mapping_.data(),
static_cast<index_type>(root_node_indexes_.size()),
num_outputs_};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct decision_forest_builder {
if (cur_tree_size_ % alignment_ != index_type{}) {
auto padding = (alignment_ - cur_tree_size_ % alignment_);
for (auto i = index_type{}; i < padding; ++i) {
add_node(typename node_type::threshold_type{});
add_node(typename node_type::threshold_type{}, std::nullopt);
}
}
}
Expand All @@ -81,6 +81,7 @@ struct decision_forest_builder {
void add_categorical_node(
iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt,
bool default_to_distant_child = false,
typename node_type::metadata_storage_type feature = typename node_type::metadata_storage_type{},
typename node_type::offset_type offset = typename node_type::offset_type{})
Expand All @@ -100,12 +101,14 @@ struct decision_forest_builder {
auto set = bitset{set_storage, max_node_categories};
std::for_each(vec_begin, vec_end, [&set](auto&& cat_index) { set.set(cat_index); });

add_node(node_value, false, default_to_distant_child, true, feature, offset, false);
add_node(node_value, tl_node_id, false, default_to_distant_child, true, feature, offset, false);
}

/* Add a leaf node with vector output */
template <typename iter_t>
void add_leaf_vector_node(iter_t vec_begin, iter_t vec_end)
void add_leaf_vector_node(iter_t vec_begin,
iter_t vec_end,
std::optional<int> tl_node_id = std::nullopt)
{
auto leaf_index = typename node_type::index_type(vector_output_.size() / output_size_);
std::copy(vec_begin, vec_end, std::back_inserter(vector_output_));
Expand All @@ -115,13 +118,16 @@ struct decision_forest_builder {
false,
typename node_type::metadata_storage_type{},
typename node_type::offset_type{});
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;
}

/* Add a node to the model */
template <typename value_t>
void add_node(
value_t val,
std::optional<int> tl_node_id = std::nullopt,
bool is_leaf_node = true,
bool default_to_distant_child = false,
bool is_categorical_node = false,
Expand All @@ -132,6 +138,8 @@ struct decision_forest_builder {
if (is_inclusive) { val = std::nextafter(val, std::numeric_limits<value_t>::infinity()); }
nodes_.emplace_back(
val, is_leaf_node, default_to_distant_child, is_categorical_node, feature, offset);
// 0 indicates the lack of ID mapping for a particular node
node_id_mapping_.push_back(static_cast<index_type>(tl_node_id.value_or(0)));
++cur_tree_size_;
}

Expand Down Expand Up @@ -192,6 +200,10 @@ struct decision_forest_builder {
mem_type,
device,
stream},
raft_proto::buffer{raft_proto::buffer{node_id_mapping_.data(), node_id_mapping_.size()},
mem_type,
device,
stream},
num_feature,
num_class,
max_num_categories_ != 0,
Expand Down Expand Up @@ -234,6 +246,7 @@ struct decision_forest_builder {
std::vector<index_type> root_node_indexes_;
std::vector<typename node_type::threshold_type> vector_output_;
std::vector<typename node_type::index_type> categorical_storage_;
std::vector<index_type> node_id_mapping_;
};

} // namespace detail
Expand Down
117 changes: 106 additions & 11 deletions cpp/include/cuml/experimental/fil/detail/evaluate_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once
#include <stdint.h>
#include <type_traits>
#ifndef __CUDACC__
#include <math.h>
#endif
Expand All @@ -26,19 +27,33 @@ namespace fil {
namespace detail {

/*
* Evaluate a single tree on a single row
* Evaluate a single tree on a single row.
* If node_id_mapping is not-nullptr, this kernel outputs leaf node's ID
* instead of the leaf value.
*
* @tparam has_vector_leaves Whether or not this tree has vector leaves
* @tparam has_categorical nodes Whether or not this tree has any nodes with
* @tparam has_categorical_nodes Whether or not this tree has any nodes with
* categorical splits
* @tparam node_t The type of nodes in this tree
* @tparam io_t The type used for input to and output from this tree (typically
* either floats or doubles)
* @tparam node_id_mapping_t If non-nullptr_t, this indicates the type we expect for
* node_id_mapping.
* @param node Pointer to the root node of this tree
* @param row Pointer to the input data for this row
* @param first_root_node Pointer to the root node of the first tree.
* @param node_id_mapping Array representing the mapping from internal node IDs to
* final leaf ID outputs
*/
template <bool has_vector_leaves, bool has_categorical_nodes, typename node_t, typename io_t>
HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __restrict__ row)
template <bool has_vector_leaves,
bool has_categorical_nodes,
typename node_t,
typename io_t,
typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl(node_t const* __restrict__ node,
io_t const* __restrict__ row,
node_t const* __restrict__ first_root_node = nullptr,
node_id_mapping_t node_id_mapping = nullptr)
{
using categorical_set_type = bitset<uint32_t, typename node_t::index_type const>;
auto cur_node = *node;
Expand All @@ -60,12 +75,18 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __re
node += cur_node.child_offset(condition);
cur_node = *node;
} while (!cur_node.is_leaf());
return cur_node.template output<has_vector_leaves>();
if constexpr (std::is_same_v<node_id_mapping_t, std::nullptr_t>) {
return cur_node.template output<has_vector_leaves>();
} else {
return node_id_mapping[node - first_root_node];
}
}

/*
* Evaluate a single tree which requires external categorical storage on a
* single node
* single node.
* If node_id_mapping is not-nullptr, this kernel outputs leaf node's ID
* instead of the leaf value.
*
* For non-categorical models and models with a relatively small number of
* categories for any feature, all information necessary for model evaluation
Expand All @@ -81,15 +102,23 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node, io_t const* __re
* either floats or doubles)
* @tparam categorical_storage_t The underlying type used for storing
* categorical data (typically char)
* @tparam node_id_mapping_t If non-nullptr_t, this indicates the type we expect for
* node_id_mapping.
* @param node Pointer to the root node of this tree
* @param row Pointer to the input data for this row
* @param categorical_storage Pointer to where categorical split data is
* stored.
*/
template <bool has_vector_leaves, typename node_t, typename io_t, typename categorical_storage_t>
HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node,
io_t const* __restrict__ row,
categorical_storage_t const* __restrict__ categorical_storage)
template <bool has_vector_leaves,
typename node_t,
typename io_t,
typename categorical_storage_t,
typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl(node_t const* __restrict__ node,
io_t const* __restrict__ row,
categorical_storage_t const* __restrict__ categorical_storage,
node_t const* __restrict__ first_root_node = nullptr,
node_id_mapping_t node_id_mapping = nullptr)
{
using categorical_set_type = bitset<uint32_t, categorical_storage_t const>;
auto cur_node = *node;
Expand All @@ -109,7 +138,73 @@ HOST DEVICE auto evaluate_tree(node_t const* __restrict__ node,
node += cur_node.child_offset(condition);
cur_node = *node;
} while (!cur_node.is_leaf());
return cur_node.template output<has_vector_leaves>();
if constexpr (std::is_same_v<node_id_mapping_t, std::nullptr_t>) {
return cur_node.template output<has_vector_leaves>();
} else {
return node_id_mapping[node - first_root_node];
}
}

/**
* Dispatch to an appropriate version of evaluate_tree kernel.
*
* @tparam has_vector_leaves Whether or not this tree has vector leaves
* @tparam has_categorical_nodes Whether or not this tree has any nodes with
* categorical splits
* @tparam has_nonlocal_categories Whether or not this tree has any nodes that store
* categorical split data externally
* @tparam predict_leaf Whether to predict leaf IDs
* @tparam forest_t The type of forest
* @tparam io_t The type used for input to and output from this tree (typically
* either floats or doubles)
* @tparam categorical_data_t The type for non-local categorical data storage.
* @param forest The forest used to perform inference
* @param tree_index The index of the tree we are evaluating
* @param row The data row we are evaluating
* @param categorical_data The pointer to where non-local data on categorical splits are stored.
*/
template <bool has_vector_leaves,
bool has_categorical_nodes,
bool has_nonlocal_categories,
bool predict_leaf,
typename forest_t,
typename io_t,
typename categorical_data_t>
HOST DEVICE auto evaluate_tree(forest_t const& forest,
index_type tree_index,
io_t const* __restrict__ row,
categorical_data_t categorical_data)
{
using node_t = typename forest_t::node_type;
if constexpr (predict_leaf) {
auto leaf_node_id = index_type{};
if constexpr (has_nonlocal_categories) {
leaf_node_id = evaluate_tree_impl<has_vector_leaves>(forest.get_tree_root(tree_index),
row,
categorical_data,
forest.get_tree_root(0),
forest.get_node_id_mapping());
} else {
leaf_node_id = evaluate_tree_impl<has_vector_leaves, has_categorical_nodes>(
forest.get_tree_root(tree_index),
row,
forest.get_tree_root(0),
forest.get_node_id_mapping());
}
return leaf_node_id;
} else {
auto tree_output = std::conditional_t<has_vector_leaves,
typename node_t::index_type,
typename node_t::threshold_type>{};
if constexpr (has_nonlocal_categories) {
tree_output = evaluate_tree_impl<has_vector_leaves>(
forest.get_tree_root(tree_index), row, categorical_data);
} else {
tree_output = evaluate_tree_impl<has_vector_leaves, has_categorical_nodes>(
forest.get_tree_root(tree_index), row);
}
return tree_output;
}
}

} // namespace detail
Expand Down
7 changes: 7 additions & 0 deletions cpp/include/cuml/experimental/fil/detail/forest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ struct forest {

HOST DEVICE forest(node_type* forest_nodes,
index_type* forest_root_indexes,
index_type* node_id_mapping,
index_type num_trees,
index_type num_outputs)
: nodes_{forest_nodes},
root_node_indexes_{forest_root_indexes},
node_id_mapping_{node_id_mapping},
num_trees_{num_trees},
num_outputs_{num_outputs}
{
Expand All @@ -56,6 +58,10 @@ struct forest {
return nodes_ + root_node_indexes_[tree_index];
}

/* Return pointer to the mapping from internal node IDs to final node ID outputs.
* Only used when infer_type == infer_kind::leaf_id */
HOST DEVICE const auto* get_node_id_mapping() const { return node_id_mapping_; }

/* Return the number of trees in this forest */
HOST DEVICE auto tree_count() const { return num_trees_; }

Expand All @@ -66,6 +72,7 @@ struct forest {
private:
node_type* nodes_;
index_type* root_node_indexes_;
index_type* node_id_mapping_;
index_type num_trees_;
index_type num_outputs_;
};
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cuml/experimental/fil/detail/infer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ namespace detail {
* required
* @param infer_type Type of inference to perform. Defaults to summing the outputs of all trees
* and produce an output per row. If set to "per_tree", we will instead output all outputs of
* individual trees.
* individual trees. If set to "leaf_id", we will output the integer ID of the leaf node
* for each tree.
* @param specified_chunk_size If non-nullopt, the size of "mini-batches"
* used for distributing work across threads
* @param device The device on which to execute evaluation
Expand Down
Loading