diff --git a/CMakeLists.txt b/CMakeLists.txt index 2838aa8a..b62db6a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1022,7 +1022,10 @@ enable_testing() # Recurse into the "Apex" and "Demo" subdirectories. This does not actually # cause another cmake executable to run. The same process will walk through # the project's entire directory structure. -add_subdirectory (src/perfetto_sdk) +if(APEX_WITH_PERFETTO) + add_definitions("-DAPEX_WITH_PERFETTO") + add_subdirectory (src/perfetto_sdk) +endif() add_subdirectory (src/apex) #add_subdirectory (src/ITTNotify) add_subdirectory (src/comm) diff --git a/cmake/Modules/APEX_DefaultOptions.cmake b/cmake/Modules/APEX_DefaultOptions.cmake index 2a014c45..61a655c8 100644 --- a/cmake/Modules/APEX_DefaultOptions.cmake +++ b/cmake/Modules/APEX_DefaultOptions.cmake @@ -16,6 +16,7 @@ option (APEX_WITH_STARPU "Enable APEX StarPU support" FALSE) option (APEX_WITH_TCMALLOC "Enable TCMalloc heap management" FALSE) option (APEX_WITH_JEMALLOC "Enable JEMalloc heap management" FALSE) option (APEX_WITH_LM_SENSORS "Enable LM Sensors support" FALSE) +option (APEX_WITH_PERFETTO "Enable native Perfetto trace support" TRUE) option (APEX_BUILD_TESTS "Build APEX tests (for 'make test')" FALSE) option (APEX_CUDA_TESTS "Build APEX CUDA tests (for 'make test')" FALSE) option (APEX_HIP_TESTS "Build APEX HIP tests (for 'make test')" FALSE) diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index dcdbbc90..bf0e7a04 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -54,9 +54,23 @@ endif() if(NOT DEFINED $HWLOC_DIR) if(DEFINED $HWLOC_ROOT) set(HWLOC_DIR $HWLOC_ROOT) + else() + # Search using CUDAToolkit_ROOT + find_program(HWLOC_LSTOPO_EXECUTABLE + NAMES lstopo lstopo-no-graphics hwloc-ls + PATHS ENV{PATH} + PATH_SUFFIXES bin + ) + if(HWLOC_LSTOPO_EXECUTABLE) + get_filename_component(hwloc_dir "${HWLOC_LSTOPO_EXECUTABLE}" DIRECTORY) + set(HWLOC_BIN_DIR "${hwloc_dir}" CACHE PATH "" FORCE) + unset(hwloc_dir) + get_filename_component(HWLOC_DIR ${HWLOC_BIN_DIR} DIRECTORY ABSOLUTE) + endif() endif() endif() + include(CheckStructHasMember) include(CheckCSourceCompiles) if (NOT HWLOC_FOUND) diff --git a/src/apex/CMakeLists.hpx b/src/apex/CMakeLists.hpx index aae50402..2c9629c6 100644 --- a/src/apex/CMakeLists.hpx +++ b/src/apex/CMakeLists.hpx @@ -255,6 +255,13 @@ if(APEX_WITH_OTF2) set(otf2_sources otf2_listener.cpp otf2_listener_hpx.cpp otf2_listener_nompi.cpp) endif() +if(APEX_WITH_PERFETTO) + set(perfetto_headers perfetto_listener.hpp perfetto_static.hpp) + set(perfetto_sources + perfetto_listener.cpp perfetto_static.cpp ../perfetto_sdk/perfetto.cc) + target_compile_definitions(apex_flags INTERFACE APEX_WITH_PERFETTO) +endif() + if(HPX_WITH_HPXMP) set(ompt_sources apex_ompt.cpp) include(APEX_SetupHPXMP) @@ -317,6 +324,7 @@ set(apex_headers task_wrapper.hpp tau_listener.hpp utils.hpp + ${perfetto_headers} ${proc_headers} ${otf2_headers} ) @@ -338,9 +346,6 @@ set(apex_sources gzstream.cpp handler.cpp memory_wrapper.cpp - ../perfetto_sdk/perfetto.cc - perfetto_listener.cpp - perfetto_static.cpp policy_handler.cpp profile_reducer.cpp profiler_listener.cpp @@ -352,6 +357,7 @@ set(apex_sources thread_instance.cpp trace_event_listener.cpp utils.cpp + ${perfetto_sources} ${proc_sources} ${bfd_sources} ${sensor_sources} @@ -364,7 +370,7 @@ set(apex_sources include(GNUInstallDirs) -if (WIN32) +if (WIN32 AND APEX_WITH_PERFETTO) # Enable standards-compliant mode when using the Visual Studio compiler. if (MSVC) SET_SOURCE_FILES_PROPERTIES( perfetto.cc PROPERTIES COMPILE_FLAGS diff --git a/src/apex/CMakeLists.standalone b/src/apex/CMakeLists.standalone index 24757bb5..31532b42 100644 --- a/src/apex/CMakeLists.standalone +++ b/src/apex/CMakeLists.standalone @@ -79,6 +79,15 @@ if (APEX_WITH_RAJA AND RAJA_FOUND) set(RAJA_SOURCE apex_raja.cpp) endif (APEX_WITH_RAJA AND RAJA_FOUND) +if(APEX_WITH_PERFETTO) + set(perfetto_headers perfetto_listener.hpp) + set(perfetto_sources + perfetto_listener.cpp perfetto_static.cpp) + set(perfetto_target perfetto) + add_definitions(-DAPEX_WITH_PERFETTO) + include_directories(${APEX_SOURCE_DIR}/src/perfetto_sdk) +endif() + # Try to keep this in alphabetical order SET(all_SOURCE apex_preload.cpp @@ -108,8 +117,7 @@ exhaustive.cpp handler.cpp memory_wrapper.cpp ${OTF2_SOURCE} -perfetto_listener.cpp -perfetto_static.cpp +${perfetto_sources} perftool_implementation.cpp policy_handler.cpp ${PROC_SOURCE} @@ -129,8 +137,7 @@ ${ZLIB_SOURCE} add_library (apex ${all_SOURCE}) add_library (taudummy tau_dummy.cpp) -add_dependencies (apex taudummy perfetto) -include_directories(${APEX_SOURCE_DIR}/src/perfetto_sdk) +add_dependencies (apex taudummy ${perfetto_target}) if(ACTIVEHARMONY_FOUND) add_dependencies (apex project_activeharmony) @@ -168,10 +175,10 @@ endif(APEX_WITH_CUDA) # so that we can LD_PRELOAD this library with all requirements met. if (NOT BUILD_STATIC_EXECUTABLES) if(APPLE) - target_link_libraries(apex perfetto ${LIBS}) + target_link_libraries(apex ${perfetto_target} ${LIBS}) set_target_properties(apex PROPERTIES LINK_FLAGS "${CMAKE_CURRENT_BINARY_DIR}/libtaudummy.dylib -flat_namespace") else(APPLE) - target_link_libraries(apex ${LIBS} perfetto taudummy) + target_link_libraries(apex ${LIBS} ${perfetto_target} taudummy) endif(APPLE) endif() diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index be9c0757..fff1f0bb 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -43,7 +43,9 @@ #include "tau_listener.hpp" #include "profiler_listener.hpp" #include "trace_event_listener.hpp" +#if defined(APEX_WITH_PERFETTO) #include "perfetto_listener.hpp" +#endif #if defined(APEX_DEBUG) || defined(APEX_ERROR_HANDLING) // #define APEX_DEBUG_disabled #include "apex_error_handling.hpp" @@ -311,10 +313,13 @@ void apex::_initialize() listeners.push_back(the_otf2_listener); } #endif +#if defined(APEX_WITH_PERFETTO) if (apex_options::use_perfetto()) { the_perfetto_listener = new perfetto_listener(); listeners.push_back(the_perfetto_listener); - } else if (apex_options::use_trace_event()) { + } +#endif + if (apex_options::use_trace_event()) { the_trace_event_listener = new trace_event_listener(); listeners.push_back(the_trace_event_listener); } diff --git a/src/apex/apex_ompt.cpp b/src/apex/apex_ompt.cpp index 6511a724..8b2f6d2d 100644 --- a/src/apex/apex_ompt.cpp +++ b/src/apex/apex_ompt.cpp @@ -23,7 +23,9 @@ #include "event_listener.hpp" #include "async_thread_node.hpp" #include "apex.hpp" +#if defined(APEX_WITH_PERFETTO) #include "perfetto_listener.hpp" +#endif #include "trace_event_listener.hpp" #ifdef APEX_HAVE_OTF2 #include "otf2_listener.hpp" @@ -175,13 +177,16 @@ void stop_async_task(std::shared_ptr tt, uint64_t start, uin if (correlationId > 0) { as_data = Globals::find_data(correlationId); } +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; as_data.cat = "ControlFlow"; as_data.reverse_flow = false; tel->on_async_event(node, prof, as_data); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; as_data.cat = "ControlFlow"; @@ -219,11 +224,14 @@ void store_profiler_data(const std::string &name, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; tel->on_async_event(node, prof, as_data); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; tel->on_async_event(node, prof, as_data); @@ -262,11 +270,14 @@ void store_counter_data(const char * name, const std::string& ctx, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; tel->on_async_metric(node, prof); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; tel->on_async_metric(node, prof); diff --git a/src/apex/cupti_trace.cpp b/src/apex/cupti_trace.cpp index 389c1157..8bc82e9a 100644 --- a/src/apex/cupti_trace.cpp +++ b/src/apex/cupti_trace.cpp @@ -24,7 +24,9 @@ #include "profiler.hpp" #include "thread_instance.hpp" #include "apex_options.hpp" +#if defined(APEX_WITH_PERFETTO) #include "perfetto_listener.hpp" +#endif #include "trace_event_listener.hpp" #include "apex_nvml.hpp" #ifdef APEX_HAVE_OTF2 @@ -525,13 +527,16 @@ void store_profiler_data(const std::string &name, uint32_t correlationId, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; as_data.cat = category; as_data.reverse_flow = reverseFlow; tel->on_async_event(node, prof, as_data); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; as_data.cat = category; @@ -590,11 +595,14 @@ void store_counter_data(const char * name, const std::string& ctx, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; tel->on_async_metric(node, prof); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; tel->on_async_metric(node, prof); diff --git a/src/apex/hip_trace.cpp b/src/apex/hip_trace.cpp index 93ab6e8e..2e47e0d3 100644 --- a/src/apex/hip_trace.cpp +++ b/src/apex/hip_trace.cpp @@ -40,7 +40,9 @@ using namespace std; #include "address_resolution.hpp" #endif #include "async_thread_node.hpp" +#if defined(APEX_WITH_PERFETTO) #include "perfetto_listener.hpp" +#endif #include "trace_event_listener.hpp" #ifdef APEX_HAVE_OTF2 #include "otf2_listener.hpp" @@ -888,13 +890,16 @@ void store_profiler_data(const std::string &name, uint32_t correlationId, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; as_data.cat = category; as_data.reverse_flow = reverse_flow; tel->on_async_event(node, prof, as_data); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; as_data.cat = category; @@ -935,11 +940,14 @@ void store_counter_data(const char * name, const std::string& ctx, // fake out the profiler_listener instance->the_profiler_listener->push_profiler_public(prof); // Handle tracing, if necessary +#if defined(APEX_WITH_PERFETTO) if (apex::apex_options::use_perfetto()) { apex::perfetto_listener * tel = (apex::perfetto_listener*)instance->the_perfetto_listener; tel->on_async_metric(node, prof); - } else if (apex::apex_options::use_trace_event()) { + } +#endif + if (apex::apex_options::use_trace_event()) { apex::trace_event_listener * tel = (apex::trace_event_listener*)instance->the_trace_event_listener; tel->on_async_metric(node, prof); diff --git a/src/perfetto_sdk/perfetto.cc b/src/perfetto_sdk/perfetto.cc index bc924ccc..525e7cd2 100644 --- a/src/perfetto_sdk/perfetto.cc +++ b/src/perfetto_sdk/perfetto.cc @@ -1,5 +1,5 @@ #ifdef __NVCOMPILER -#warning "nvc++ takes a long time to compile this file..." +#warning "nvc++ takes a long time to compile this file...If you don't plan to use native perfetto trace output support, disable it with the CMake option -DAPEX_WITH_PERFETTO=FALSE" #endif // Copyright (C) 2019 The Android Open Source Project //