From 823c68fb173a12db5464bf82ab20e6756135e9d5 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 8 Jun 2021 12:48:34 -0500 Subject: [PATCH] Add info about NVTX ranges to dev guide. --- cpp/docs/DEVELOPER_GUIDE.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index f2873e31c5b..8ec111acdb2 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -342,6 +342,7 @@ namespace detail{ } // namespace detail void external_function(...){ + CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function detail::external_function(...); } ``` @@ -355,6 +356,12 @@ asynchrony if and when we add an asynchronous API to libcudf. **Note:** `cudaDeviceSynchronize()` should *never* be used. This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs. + ### NVTX Ranges + + In order to aid in performance optimization and debugging, all compute intensive libcudf functions should have a corresponding NVTX range. + In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` that will automatically annotate the lifetime of the enclosing function and use the functions name as the name of the NVTX range. + For more information about NVTX, see [here](https://github.com/NVIDIA/NVTX/tree/dev/cpp). + ### Stream Creation There may be times in implementing libcudf features where it would be advantageous to use streams