Skip to content

Commit

Permalink
OMPT Target Offload Support
Browse files Browse the repository at this point in the history
* Porting from ROCm/omnitrace#411
* Improve OMPT support
* Add OpenMP target example to testing
* Update Timemory submodule to use ROCm/Timemory rather than
  NERSC/Timemory

Signed-off-by: David Galiffi <[email protected]>
  • Loading branch information
dgaliffiAMD committed Nov 6, 2024
1 parent 21d9ab7 commit 99db079
Show file tree
Hide file tree
Showing 13 changed files with 757 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "external/timemory"]
path = external/timemory
url = https://github.com/NERSC/timemory.git
url = https://github.com/ROCm/timemory.git
[submodule "external/perfetto"]
path = external/perfetto
url = https://github.com/google/perfetto.git
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export LD_LIBRARY_PATH=/opt/rocprofiler-systems/lib:${LD_LIBRARY_PATH}

Generate a rocprofiler-systems configuration file using `rocprof-sys-avail -G rocprof-sys.cfg`. Optionally, use `rocprof-sys-avail -G rocprof-sys.cfg --all` for
a verbose configuration file with descriptions, categories, etc. Modify the configuration file as desired, e.g. enable
[perfetto](https://perfetto.dev/), [timemory](https://github.com/NERSC/timemory), sampling, and process-level sampling by default
[perfetto](https://perfetto.dev/), [timemory](https://github.com/ROCm/timemory), sampling, and process-level sampling by default
and tweak some sampling default values:

```console
Expand Down
2 changes: 1 addition & 1 deletion cmake/Packages.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ mark_as_advanced(TIMEMORY_PROJECT_NAME)
rocprofiler_systems_checkout_git_submodule(
RELATIVE_PATH external/timemory
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/NERSC/timemory.git
REPO_URL https://github.com/ROCm/timemory.git
REPO_BRANCH omnitrace)

rocprofiler_systems_save_variables(
Expand Down
2 changes: 1 addition & 1 deletion docs/conceptual/rocprof-sys-feature-set.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The ROCm Systems Profiler feature set and use cases
***************************************

`ROCm Systems Profiler <https://github.com/ROCm/rocprofiler-systems>`_ is designed to be highly extensible.
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/NERSC/timemory>`_
Internally, it leverages the `Timemory performance analysis toolkit <https://github.com/ROCm/timemory>`_
to manage extensions, resources, data, and other items. It supports the following features,
modes, metrics, and APIs.

Expand Down
4 changes: 2 additions & 2 deletions docs/how-to/configuring-runtime-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use the ``rocprof-sys-avail -G ~/.rocprof-sys.cfg --all`` option
for a verbose configuration file with descriptions, categories, and additional information.

Modify ``${HOME}/.rocprof-sys.cfg`` as required. For example, enable `Perfetto <https://perfetto.dev/>`_,
`Timemory <https://github.com/NERSC/timemory>`_, sampling, and process-level sampling by default
`Timemory <https://github.com/ROCm/timemory>`_, sampling, and process-level sampling by default
and tweak the default sampling values.

.. code-block:: shell
Expand Down Expand Up @@ -64,7 +64,7 @@ accepts a case insensitive match for nearly all common Boolean logic expressions
Exploring components
-----------------------------------

ROCm Systems Profiler uses `Timemory <https://github.com/NERSC/timemory>`_ extensively to provide
ROCm Systems Profiler uses `Timemory <https://github.com/ROCm/timemory>`_ extensively to provide
various capabilities and manage
data and resources. By default, with ``ROCPROFSYS_PROFILE=ON``, ROCm Systems Profiler only collects wall-clock
timing values. However, by modifying the ``ROCPROFSYS_TIMEMORY_COMPONENTS`` setting,
Expand Down
2 changes: 2 additions & 0 deletions examples/openmp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@ if(ROCPROFSYS_INSTALL_EXAMPLES)
DESTINATION bin
COMPONENT rocprofiler-systems-examples)
endif()

add_subdirectory(target)
108 changes: 108 additions & 0 deletions examples/openmp/target/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#
#
#
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)

# try to find a compatible HIP version
if(ROCmVersion_MAJOR_VERSION AND ROCmVersion_MAJOR_VERSION GREATER_EQUAL 6)
find_package(hip ${ROCmVersion_MAJOR_VERSION}.0.0)
else()
find_package(hip)
endif()

if(NOT hip_FOUND)
message(WARNING "ROCm >= 5.6 not found. Skipping OpenMP target example.")
return()
elseif(hip_FOUND AND hip_VERSION VERSION_LESS 5.6.0)
message(
WARNING
"ROCm >= 5.6 not found (found ${hip_VERSION}). Skipping OpenMP target example."
)
return()
endif()

if(NOT OMP_TARGET_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin)
mark_as_advanced(amdclangpp_EXECUTABLE)

if(amdclangpp_EXECUTABLE)
set(OMP_TARGET_COMPILER
"${amdclangpp_EXECUTABLE}"
CACHE FILEPATH "OpenMP target compiler")
else()
message(WARNING "OpenMP target compiler not found. Skipping this example.")
return()
endif()
endif()

project(rocprofiler-systems-example-openmp-target-lib LANGUAGES CXX)

set(CMAKE_BUILD_TYPE "RelWithDebInfo")

set(DEFAULT_GPU_TARGETS
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx940"
"gfx941"
"gfx942"
"gfx1030"
"gfx1010"
"gfx1100"
"gfx1101"
"gfx1102")

set(GPU_TARGETS
"${DEFAULT_GPU_TARGETS}"
CACHE STRING "GPU targets to compile for")

find_package(Threads REQUIRED)

add_library(openmp-target-lib SHARED)
target_sources(openmp-target-lib PRIVATE library.cpp)
target_link_libraries(openmp-target-lib PUBLIC Threads::Threads)
target_compile_options(openmp-target-lib PRIVATE -fopenmp -ggdb)
target_link_options(openmp-target-lib PUBLIC -fopenmp)

foreach(_TARGET ${GPU_TARGETS})
target_compile_options(openmp-target-lib PRIVATE --offload-arch=${_TARGET})
target_link_options(openmp-target-lib PUBLIC --offload-arch=${_TARGET})
endforeach()

message(STATUS "Using OpenMP target compiler: ${OMP_TARGET_COMPILER}")

get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER} PATH)
get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER_DIR} PATH)

message(STATUS "Using OpemMP target compiler directory: ${OMP_TARGET_COMPILER_DIR}")

if(NOT EXISTS ${OMP_TARGET_COMPILER_DIR}/llvm/lib)
message(FATAL_ERROR "${OMP_TARGET_COMPILER_DIR}/llvm/lib does not exist")
endif()
set_target_properties(
openmp-target-lib
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
OUTPUT_NAME "openmp-target"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target-lib COMPILER ${OMP_TARGET_COMPILER})

add_executable(openmp-target)
target_sources(openmp-target PRIVATE main.cpp)
target_link_libraries(openmp-target PRIVATE openmp-target-lib)
target_compile_options(openmp-target PRIVATE -ggdb)

set_target_properties(
openmp-target
PROPERTIES BUILD_RPATH
"${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib"
POSITION_INDEPENDENT_CODE ON)

rocprofiler_systems_custom_compilation(TARGET openmp-target COMPILER ${OMP_TARGET_COMPILER})
149 changes: 149 additions & 0 deletions examples/openmp/target/library.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <cstdlib>
#include <fstream>
#include <iostream>
#include <math.h>
#include <sstream>
#include <stdio.h>
#include <string>
#include <thread>
#include <unistd.h>

constexpr float EPS_FLOAT = 1.0e-7f;
constexpr double EPS_DOUBLE = 1.0e-15;

#pragma omp declare target
template <typename T>
T
mul(T a, T b)
{
T c;
c = a * b;
return c;
}
#pragma omp end declare target

template <typename T>
void
vmul(T* a, T* b, T* c, int N)
{
#pragma omp target map(to : a [0:N], b [0:N]) map(from : c [0:N])
#pragma omp teams distribute parallel for
for(int i = 0; i < N; i++)
{
c[i] = mul(a[i], b[i]);
}
}

int
run_impl()
{
std::this_thread::sleep_for(std::chrono::milliseconds{ 50 });

constexpr int N = 100000;
constexpr int Nc = N / 100;
int a_i[N], b_i[N], c_i[N], validate_i[N];
float a_f[N], b_f[N], c_f[N], validate_f[N];
double a_d[N], b_d[N], c_d[N], validate_d[N];
int N_errors = 0;
bool flag = false;

#pragma omp parallel for schedule(dynamic, Nc)
for(int i = 0; i < N; ++i)
{
a_f[i] = a_i[i] = i + 1;
b_f[i] = b_i[i] = i + 2;
a_d[i] = a_i[i];
b_d[i] = b_i[i];
validate_i[i] = a_i[i] * b_i[i];
validate_f[i] = a_f[i] * b_f[i];
validate_d[i] = a_d[i] * b_d[i];
}

for(int i = 0; i < 2; ++i)
{
vmul(a_i, b_i, c_i, N);
vmul(a_f, b_f, c_f, N);
vmul(a_d, b_d, c_d, N);
}

for(int i = 0; i < N; i++)
{
if(c_i[i] != validate_i[i])
{
++N_errors;
// print 1st bad index
if(!flag)
{
printf("First fail: c_i[%d](%d) != validate_i[%d](%d)\n", i, c_i[i], i,
validate_i[i]);
flag = true;
}
}
}
flag = false;
for(int i = 0; i < N; i++)
{
if(fabs(c_f[i] - validate_f[i]) > EPS_FLOAT)
{
++N_errors;
// print 1st bad index
if(!flag)
{
printf("First fail: c_f[%d](%f) != validate_f[%d](%f)\n", i,
static_cast<double>(c_f[i]), i,
static_cast<double>(validate_f[i]));
flag = true;
}
}
}
flag = false;
for(int i = 0; i < N; i++)
{
if(fabs(c_d[i] - validate_d[i]) > EPS_DOUBLE)
{
++N_errors;
// print 1st bad index
if(!flag)
{
printf("First fail: c_d[%d](%f) != validate_d[%d](%f)\n", i, c_d[i], i,
validate_d[i]);
flag = true;
}
}
}

return N_errors;
}

int
run()
{
#pragma omp parallel
{
run_impl();
}

return 0;
}
52 changes: 52 additions & 0 deletions examples/openmp/target/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <cstdlib>
#include <fstream>
#include <iostream>
#include <math.h>
#include <sstream>
#include <stdio.h>
#include <string>
#include <unistd.h>

extern int
run();

int
main()
{
auto N_errors = run();
auto _ec = EXIT_SUCCESS;
if(N_errors == 0)
{
printf("Success\n");
}
else
{
printf("Total %d failures\n", N_errors);
printf("Fail\n");
_ec = EXIT_FAILURE;
}

return _ec;
}
Loading

0 comments on commit 99db079

Please sign in to comment.