From 1e5f7f6651dcd78a0a5d64083c1503864c908ac3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 22:55:08 -0400 Subject: [PATCH 01/20] Bump rocm-docs-core[api_reference] from 1.8.2 to 1.8.3 in /docs/sphinx (#12) Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.8.2 to 1.8.3. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.8.2...v1.8.3) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 26 ++------------------------ 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index aa0042a5..ad94caa9 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.8.2 +rocm-docs-core[api_reference]==1.8.3 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index cd2e0a3d..1b5eb1e0 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -25,12 +25,7 @@ cffi==1.17.1 charset-normalizer==3.3.2 # via requests click==8.1.7 - # via - # click-log - # doxysphinx - # sphinx-external-toc -click-log==0.4.0 - # via doxysphinx + # via sphinx-external-toc cryptography==43.0.1 # via pyjwt deprecated==1.2.14 @@ -41,8 +36,6 @@ docutils==0.21.2 # myst-parser # pydata-sphinx-theme # sphinx -doxysphinx==3.3.10 - # via rocm-docs-core fastjsonschema==2.20.0 # via rocm-docs-core gitdb==4.0.11 @@ -57,10 +50,6 @@ jinja2==3.1.4 # via # myst-parser # sphinx -libsass==0.22.0 - # via doxysphinx -lxml==4.9.4 - # via doxysphinx markdown-it-py==3.0.0 # via # mdit-py-plugins @@ -71,12 +60,8 @@ mdit-py-plugins==0.4.2 # via myst-parser mdurl==0.1.2 # via markdown-it-py -mpire==2.10.2 - # via doxysphinx myst-parser==4.0.0 # via rocm-docs-core -numpy==1.26.4 - # via doxysphinx packaging==24.1 # via # pydata-sphinx-theme @@ -92,17 +77,12 @@ pygithub==2.4.0 pygments==2.18.0 # via # accessible-pygments - # mpire # pydata-sphinx-theme # sphinx -pyjson5==1.6.6 - # via doxysphinx pyjwt[crypto]==2.9.0 # via pygithub pynacl==1.5.0 # via pygithub -pyparsing==3.1.4 - # via doxysphinx pyyaml==6.0.2 # via # myst-parser @@ -112,7 +92,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.8.2 +rocm-docs-core[api-reference]==1.8.3 # via -r requirements.in smmap==5.0.1 # via gitdb @@ -155,8 +135,6 @@ sphinxcontrib-serializinghtml==2.0.0 # via sphinx tomli==2.0.2 # via sphinx -tqdm==4.66.5 - # via mpire typing-extensions==4.12.2 # via # pydata-sphinx-theme From 21d9ab79fd9f3368adb3d72f42755d05d935d6b3 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Tue, 5 Nov 2024 10:14:25 -0500 Subject: [PATCH 02/20] Fix for proto files not being viewable in Perfetto UI (#16) - Fix for proto files not being viewable in Perfetto UI - Ported from https://github.com/ROCm/omnitrace/pull/411 - Update Workflows - Use V47 trace_processor_shell for certain OS releases. - RedHat 8, SUSE 15.5, and Ubuntu 20.04 are no longer compatible with the latest trace_processor_shell. - Incompatible version of GLIBC. - Remove notes about Perfetto workaround in documentation. --------- Signed-off-by: David Galiffi --- .github/workflows/opensuse.yml | 8 ++- .github/workflows/redhat.yml | 10 +-- .github/workflows/ubuntu-focal.yml | 22 +++--- .github/workflows/ubuntu-jammy.yml | 6 +- README.md | 4 -- .../understanding-rocprof-sys-output.rst | 5 -- docs/install/install.rst | 5 -- docs/what-is-rocprof-sys.rst | 5 -- source/lib/core/config.cpp | 70 +++++++++++++++---- source/lib/core/config.hpp | 7 ++ source/lib/core/perfetto.cpp | 38 +++++----- 11 files changed, 110 insertions(+), 70 deletions(-) diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 36836733..42ab253e 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -58,11 +58,13 @@ jobs: timeout_minutes: 25 max_attempts: 5 command: | - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && - chmod +x /opt/trace_processor/bin/trace_processor_shell + if [ "${{ matrix.os-release }}" == "15.5" ]; then + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + chmod +x /opt/trace_processor/bin/trace_processor_shell + fi python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done - name: Configure Env run: diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index 81065b80..6f6b336c 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -56,23 +56,25 @@ jobs: run: echo "CC=$(echo '${{ matrix.compiler }}' | sed 's/+/c/g')" >> $GITHUB_ENV && echo "CXX=${{ matrix.compiler }}" >> $GITHUB_ENV && + echo "OS_VERSION_MAJOR=$(cat /etc/os-release | grep 'VERSION_ID' | sed 's/=/ /1' | awk '{print $NF}' | sed 's/"//g' | sed 's/\./ /g' | awk '{print $1}')" >> $GITHUB_ENV && env - name: Install Packages shell: bash run: | - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && - chmod +x /opt/trace_processor/bin/trace_processor_shell + if [ $OS_VERSION_MAJOR -eq 8 ]; then + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + chmod +x /opt/trace_processor/bin/trace_processor_shell + fi python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done - name: Install ROCm Packages if: ${{ matrix.rocm-version > 0 }} timeout-minutes: 30 shell: bash run: | - OS_VERSION_MAJOR=$(cat /etc/os-release | grep 'VERSION_ID' | sed 's/=/ /1' | awk '{print $NF}' | sed 's/"//g' | sed 's/\./ /g' | awk '{print $1}') RPM_TAG=".el${OS_VERSION_MAJOR}" ROCM_VERSION=${{ matrix.rocm-version }} ROCM_MAJOR=$(echo ${ROCM_VERSION} | sed 's/\./ /g' | awk '{print $1}') diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 1b65e5e3..242576aa 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -93,12 +93,12 @@ jobs: add-apt-repository -y ppa:ubuntu-toolchain-r/test && apt-get update && apt-get upgrade -y && - apt-get install -y build-essential m4 autoconf libtool python3-pip libiberty-dev clang libmpich-dev mpich environment-modules ${{ matrix.compiler }} && - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + apt-get install -y autoconf bison build-essential clang environment-modules gettext libiberty-dev libmpich-dev libtool m4 mpich python3-pip texinfo ${{ matrix.compiler }} && + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a @@ -274,12 +274,12 @@ jobs: wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${{ matrix.rocm-version }}/ ubuntu main" | tee /etc/apt/sources.list.d/rocm.list && apt-get update && - apt-get install -y build-essential m4 autoconf libtool python3-pip clang libomp-dev ${{ matrix.compiler }} libudev1 libnuma1 rocm-dev rocm-utils rocm-smi-lib roctracer-dev rocprofiler-dev rccl-dev hip-base hsa-amd-aqlprofile hsa-rocr-dev hsakmt-roct-dev libpapi-dev curl libopenmpi-dev openmpi-bin libfabric-dev && - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + apt-get install -y autoconf bison build-essential clang curl gettext libfabric-dev libnuma1 libomp-dev libopenmpi-dev libpapi-dev libtool libudev1 m4 openmpi-bin python3-pip rocm-dev texinfo && + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a @@ -434,10 +434,10 @@ jobs: max_attempts: 5 command: | sudo apt-get update && - sudo apt-get install -y build-essential m4 autoconf libtool python3-pip clang libomp-dev environment-modules ${{ matrix.deps }} ${{ matrix.compiler }} && + sudo apt-get install -y autoconf bison build-essential clang environment-modules gettext libomp-dev libtool m4 python3-pip texinfo ${{ matrix.compiler }} ${{ matrix.deps }} && if [ "${{ matrix.mpi }}" = "mpich" ]; then sudo apt-get install -y libmpich-dev mpich; fi && if [ "${{ matrix.mpi }}" = "openmpi" ]; then sudo apt-get install -y libopenmpi-dev openmpi-bin libfabric-dev; fi && - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && @@ -588,12 +588,12 @@ jobs: max_attempts: 5 command: | apt-get update && - apt-get install -y build-essential m4 autoconf libtool python3-pip clang libomp-dev environment-modules gcc g++ mpich libmpich-dev texinfo && - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && + apt-get install -y autoconf bison build-essential clang environment-modules gcc g++ libmpich-dev libomp-dev libtool m4 mpich python3-pip texinfo && + wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index 342d5544..78574f03 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -109,12 +109,10 @@ jobs: apt-get update && apt-get install -y software-properties-common && apt-get upgrade -y && - apt-get install -y build-essential m4 autoconf libtool python3-pip libiberty-dev clang libomp-dev libopenmpi-dev libfabric-dev openmpi-bin environment-modules ${{ matrix.compiler }} && - wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v46.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && - chmod +x /opt/trace_processor/bin/trace_processor_shell && + apt-get install -y autoconf bison build-essential clang environment-modules gettext libfabric-dev libiberty-dev libomp-dev libopenmpi-dev libtool m4 openmpi-bin python3-pip texinfo ${{ matrix.compiler }} && python3 -m pip install --upgrade pip && python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done - name: Install ROCm Packages timeout-minutes: 25 diff --git a/README.md b/README.md index 3aa7f274..963d5fc4 100755 --- a/README.md +++ b/README.md @@ -7,10 +7,6 @@ [![Installer Packaging (CPack)](https://github.com/ROCm/rocprofiler-systems/actions/workflows/cpack.yml/badge.svg)](https://github.com/ROCm/rocprofiler-systems/actions/workflows/cpack.yml) [![Documentation](https://github.com/ROCm/rocprofiler-systems/actions/workflows/docs.yml/badge.svg)](https://github.com/ROCm/rocprofiler-systems/actions/workflows/docs.yml) -> [!NOTE] -> Perfetto validation is done with trace_processor v46.0, as there is a known issue with v47.0. -If you are experiencing problems viewing your trace in the latest version of [Perfetto](http://ui.perfetto.dev), then try using [Perfetto UI v46.0](https://ui.perfetto.dev/v46.0-35b3d9845/#!/). - ## Overview ROCm Systems Profiler (rocprofiler-systems), formerly Omnitrace, is a comprehensive profiling and tracing tool for parallel applications written in C, C++, Fortran, HIP, OpenCL, and Python which execute on the CPU or CPU+GPU. diff --git a/docs/how-to/understanding-rocprof-sys-output.rst b/docs/how-to/understanding-rocprof-sys-output.rst index cdd75cd9..22549e24 100644 --- a/docs/how-to/understanding-rocprof-sys-output.rst +++ b/docs/how-to/understanding-rocprof-sys-output.rst @@ -323,11 +323,6 @@ absolute path, then all ``ROCPROFSYS_OUTPUT_PATH`` and similar settings are ignored. Visit `ui.perfetto.dev `_ and open this file. -.. important:: - Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. - If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, - then try using `Perfetto UI v46.0 `_. - .. image:: ../data/rocprof-sys-perfetto.png :alt: Visualization of a performance graph in Perfetto diff --git a/docs/install/install.rst b/docs/install/install.rst index 5467a9d0..285f635b 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -253,11 +253,6 @@ into Perfetto support for ROCm Systems Profiler, for example, ``ROCPROFSYS_USE_P is passed along to Perfetto and is displayed when the ``.proto`` file is visualized in `the Perfetto UI `_. -.. important:: - Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. - If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, - then try using `Perfetto UI v46.0 `_. - .. code-block:: shell git clone https://github.com/ROCm/rocprofiler-systems.git rocprof-sys-source diff --git a/docs/what-is-rocprof-sys.rst b/docs/what-is-rocprof-sys.rst index 952a0197..09ec88a6 100644 --- a/docs/what-is-rocprof-sys.rst +++ b/docs/what-is-rocprof-sys.rst @@ -15,11 +15,6 @@ A visualization of the comprehensive ROCm Systems Profiler results can be observ web browser. Upload the Perfetto (``.proto``) output files produced by ROCm Systems Profiler at `ui.perfetto.dev `_ to see the details. -.. important:: - Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. - If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, - then try using `Perfetto UI v46.0 `_. - Aggregated high-level results are available as human-readable text files and JSON files for programmatic analysis. The JSON output files are compatible with the `hatchet `_ Python package. Hatchet converts diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index c519dfd7..38fbc4e5 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -2454,18 +2454,37 @@ tmp_file::~tmp_file() remove(); } -bool -tmp_file::open(std::ios::openmode _mode) +void +tmp_file::touch() const { - ROCPROFSYS_BASIC_VERBOSE(2, "Opening temporary file '%s'...\n", filename.c_str()); - if(!filepath::exists(filename)) { // if the filepath does not exist, open in out mode to create it - std::ofstream _ofs{}; + auto _ofs = std::ofstream{}; filepath::open(_ofs, filename); } +} + +bool +tmp_file::open(int _mode, int _perms) +{ + ROCPROFSYS_BASIC_VERBOSE(2, "Opening temporary file '%s'...\n", filename.c_str()); + + touch(); + m_pid = getpid(); + fd = ::open(filename.c_str(), _mode, _perms); + return (fd > 0); +} + +bool +tmp_file::open(std::ios::openmode _mode) +{ + ROCPROFSYS_BASIC_VERBOSE(2, "Opening temporary file '%s'...\n", filename.c_str()); + + touch(); + + m_pid = getpid(); stream.open(filename, _mode); return (stream.is_open() && stream.good()); @@ -2476,14 +2495,10 @@ tmp_file::fopen(const char* _mode) { ROCPROFSYS_BASIC_VERBOSE(2, "Opening temporary file '%s'...\n", filename.c_str()); - if(!filepath::exists(filename)) - { - // if the filepath does not exist, open in out mode to create it - std::ofstream _ofs{}; - filepath::open(_ofs, filename); - } + touch(); - file = filepath::fopen(filename, _mode); + m_pid = getpid(); + file = filepath::fopen(filename, _mode); if(file) fd = ::fileno(file); return (file != nullptr && fd > 0); @@ -2492,6 +2507,8 @@ tmp_file::fopen(const char* _mode) bool tmp_file::flush() { + if(m_pid != getpid()) return false; + if(stream.is_open()) { stream.flush(); @@ -2508,6 +2525,18 @@ tmp_file::flush() } return (_ret == 0); } + else if(fd > 0) + { + int _ret = ::fsync(fd); + int _cnt = 0; + while(_ret == EAGAIN || _ret == EINTR) + { + std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); + _ret = ::fsync(fd); + if(++_cnt > 10) break; + } + return (_ret == 0); + } return true; } @@ -2517,6 +2546,8 @@ tmp_file::close() { flush(); + if(m_pid != getpid()) return false; + if(stream.is_open()) { stream.close(); @@ -2532,6 +2563,15 @@ tmp_file::close() } return (_ret == 0); } + else if(fd > 0) + { + auto _ret = ::close(fd); + if(_ret == 0) + { + fd = -1; + } + return (_ret == 0); + } return true; } @@ -2539,6 +2579,8 @@ tmp_file::close() bool tmp_file::remove() { + if(m_pid != getpid()) return false; + close(); if(filepath::exists(filename)) { @@ -2553,7 +2595,9 @@ tmp_file::remove() tmp_file::operator bool() const { - return (stream.is_open() && stream.good()) || (file != nullptr && fd > 0); + return (m_pid == getpid()) && + ((stream.is_open() && stream.good()) || (file != nullptr && fd > 0) || + (file == nullptr && fd > 0)); } std::shared_ptr diff --git a/source/lib/core/config.hpp b/source/lib/core/config.hpp index 2b9847bc..2ccbce23 100644 --- a/source/lib/core/config.hpp +++ b/source/lib/core/config.hpp @@ -374,6 +374,7 @@ struct tmp_file tmp_file(std::string); ~tmp_file(); + bool open(int, int); bool open(std::ios::openmode = std::ios::binary | std::ios::in | std::ios::out); bool fopen(const char* = "r+"); bool flush(); @@ -386,6 +387,12 @@ struct tmp_file std::fstream stream = {}; FILE* file = nullptr; int fd = -1; + +private: + void touch() const; + +private: + pid_t m_pid = getpid(); }; std::shared_ptr diff --git a/source/lib/core/perfetto.cpp b/source/lib/core/perfetto.cpp index ac2c2e50..427ca6e6 100644 --- a/source/lib/core/perfetto.cpp +++ b/source/lib/core/perfetto.cpp @@ -22,10 +22,13 @@ #include "perfetto.hpp" #include "config.hpp" +#include "debug.hpp" #include "library/runtime.hpp" #include "perfetto_fwd.hpp" #include "utility.hpp" +#include + namespace rocprofsys { namespace perfetto @@ -120,18 +123,18 @@ start() if(!_tmp_file) { _tmp_file = config::get_tmp_file("perfetto-trace", "proto"); - _tmp_file->fopen("w+"); - } - else - { - ROCPROFSYS_VERBOSE(2, "Resuming perfetto...\n"); - _tmp_file->fopen("a+"); + _tmp_file->open(O_RDWR | O_CREAT | O_TRUNC, 0600); } } ROCPROFSYS_VERBOSE(2, "Setup perfetto...\n"); int _fd = (_tmp_file) ? _tmp_file->fd : -1; auto& cfg = get_config(); + tracing_session->SetOnErrorCallback([](::perfetto::TracingError _err) { + if(_err.code == ::perfetto::TracingError::kTracingFailed) + ROCPROFSYS_WARNING(0, "perfetto encountered a tracing error: %s\n", + _err.message.c_str()); + }); tracing_session->Setup(cfg, _fd); tracing_session->StartBlocking(); } @@ -174,7 +177,7 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error) if(_tmp_file && *_tmp_file) { _tmp_file->close(); - FILE* _fdata = fopen(_tmp_file->filename.c_str(), "rb"); + FILE* _fdata = ::fopen(_tmp_file->filename.c_str(), "rb"); if(!_fdata) { @@ -184,22 +187,25 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error) return char_vec_t{ tracing_session->ReadTraceBlocking() }; } - fseek(_fdata, 0, SEEK_END); - size_t _fnum_elem = ftell(_fdata); - fseek(_fdata, 0, SEEK_SET); // same as rewind(f); + ::fseek(_fdata, 0, SEEK_END); + size_t _fnum_elem = ::ftell(_fdata); + ::fseek(_fdata, 0, SEEK_SET); // same as rewind(f); - _data.resize(_fnum_elem + 1); - auto _fnum_read = fread(_data.data(), sizeof(char), _fnum_elem, _fdata); - fclose(_fdata); + _data.resize(_fnum_elem, '\0'); + auto _fnum_read = ::fread(_data.data(), sizeof(char), _fnum_elem, _fdata); + ::fclose(_fdata); ROCPROFSYS_CI_THROW( _fnum_read != _fnum_elem, "Error! read %zu elements from perfetto trace file '%s'. Expected %zu\n", _fnum_read, _tmp_file->filename.c_str(), _fnum_elem); } + else + { + _data = char_vec_t{ tracing_session->ReadTraceBlocking() }; + } - return utility::combine(_data, - char_vec_t{ tracing_session->ReadTraceBlocking() }); + return _data; }; auto trace_data = char_vec_t{}; @@ -251,7 +257,7 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error) else { // Write the trace into a file. - ofs.write(&trace_data[0], trace_data.size()); + ofs.write(trace_data.data(), trace_data.size()); if(config::get_verbose() >= 0) _fom.append("%s", "Done"); // NOLINT if(_timemory_manager) _timemory_manager->add_file_output("protobuf", "perfetto", _filename); From fb549fc83e70d371437be6b47733ea21297dad25 Mon Sep 17 00:00:00 2001 From: Peter Park Date: Thu, 7 Nov 2024 11:51:02 -0500 Subject: [PATCH 03/20] Add CHANGELOG.md (#18) --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..9453dd51 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ +# Changelog for ROCm Systems Profiler + +Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/). + +## ROCm Systems Profiler 0.1.0 for ROCm 6.3.0 + +### Changed + +- Renamed Omnitrace to ROCm Systems Profiler. + +## Omnitrace 1.11.2 for ROCm 6.2.1 + +### Known issues + +- Perfetto can no longer open Omnitrace proto files. Loading the Perfetto trace output `.proto` file in `ui.perfetto.dev` can + result in a dialog with the message, "Oops, something went wrong! Please file a bug." The information in the dialog will + refer to an "Unknown field type." The workaround is to open the files with the previous version of the Perfetto UI found + at https://ui.perfetto.dev/v46.0-35b3d9845/#!/. From 64526650be05e1f579c90493466cdf77a8d370d3 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 7 Nov 2024 14:29:37 -0500 Subject: [PATCH 04/20] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0cab2fb5..51cdbce6 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,4 +1,4 @@ -* @jrmadsen @dgaliffiAMD + * @dgaliffiAMD @ajanicijamd @wilephan-amd @jrmadsen # Documentation files docs/* @ROCm/rocm-documentation From 2cb8a82101ec1fbe2f1e2af09cbebf371472baac Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 7 Nov 2024 14:30:06 -0500 Subject: [PATCH 05/20] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 51cdbce6..ea317444 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,4 +1,4 @@ - * @dgaliffiAMD @ajanicijamd @wilephan-amd @jrmadsen +* @dgaliffiAMD @ajanicijamd @wilephan-amd @jrmadsen # Documentation files docs/* @ROCm/rocm-documentation From 7dce5926a7310275cf63f298f0ebf0e80b8829ee Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 7 Nov 2024 16:49:32 -0500 Subject: [PATCH 06/20] OMPT Target Offload Support (#17) - Porting from https://github.com/ROCm/omnitrace/pull/411 - Improve OMPT support - Add OpenMP target example to testing - Update Timemory submodule to use ROCm/Timemory rather than NERSC/Timemory - Update `actions/upload-artifacts` to v4 - Standardize the `cmake_minimum_required` to 3.18.4 across workflows, project, and examples - Updated Ubuntu 20.04 workflows --- .github/workflows/opensuse.yml | 9 +- .github/workflows/redhat.yml | 5 +- .github/workflows/ubuntu-focal.yml | 30 +- .github/workflows/ubuntu-jammy.yml | 5 +- .gitmodules | 2 +- CMakeLists.txt | 2 +- README.md | 2 +- cmake/Packages.cmake | 2 +- docs/conceptual/rocprof-sys-feature-set.rst | 2 +- docs/how-to/configuring-runtime-options.rst | 4 +- examples/CMakeLists.txt | 2 +- examples/causal/CMakeLists.txt | 2 +- examples/code-coverage/CMakeLists.txt | 2 +- examples/fork/CMakeLists.txt | 2 +- examples/lulesh/CMakeLists.txt | 2 +- examples/mpi/CMakeLists.txt | 2 +- examples/openmp/CMakeLists.txt | 4 +- examples/openmp/target/CMakeLists.txt | 110 +++++ examples/openmp/target/library.cpp | 149 +++++++ examples/openmp/target/main.cpp | 52 +++ examples/parallel-overhead/CMakeLists.txt | 2 +- examples/python/CMakeLists.txt | 2 +- examples/rccl/CMakeLists.txt | 2 +- examples/rewrite-caller/CMakeLists.txt | 2 +- examples/trace-time-window/CMakeLists.txt | 2 +- examples/transpose/CMakeLists.txt | 2 +- examples/user-api/CMakeLists.txt | 2 +- external/timemory | 2 +- scripts/run-ci.sh | 2 +- scripts/test-find-package.sh | 2 +- scripts/write-rocprof-sys-install.cmake | 2 +- source/lib/rocprof-sys/library/ompt.cpp | 429 +++++++++++++++++-- source/lib/rocprof-sys/library/roctracer.cpp | 49 +-- source/lib/rocprof-sys/library/tracing.hpp | 36 ++ 34 files changed, 801 insertions(+), 125 deletions(-) create mode 100644 examples/openmp/target/CMakeLists.txt create mode 100644 examples/openmp/target/library.cpp create mode 100644 examples/openmp/target/main.cpp diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 42ab253e..f6de06e6 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -63,8 +63,9 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell fi python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Configure Env run: @@ -136,7 +137,7 @@ jobs: - name: CTest Artifacts if: failure() continue-on-error: True - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ctest-${{ github.job }}-${{ strategy.job-index }}-log path: | @@ -145,7 +146,7 @@ jobs: - name: Data Artifacts if: failure() continue-on-error: True - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: data-${{ github.job }}-${{ strategy.job-index }}-files path: | diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index 6f6b336c..d48ab579 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -67,8 +67,9 @@ jobs: chmod +x /opt/trace_processor/bin/trace_processor_shell fi python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Install ROCm Packages if: ${{ matrix.rocm-version > 0 }} diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 242576aa..93ed6182 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -46,7 +46,7 @@ jobs: compiler: ['g++-7', 'g++-8'] lto: ['OFF'] strip: ['OFF'] - python: ['ON'] + python: ['OFF'] build-type: ['Release'] mpi-headers: ['OFF'] static-libgcc: ['OFF'] @@ -97,8 +97,9 @@ jobs: wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a @@ -278,8 +279,9 @@ jobs: wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a @@ -408,16 +410,6 @@ jobs: ompt: ['ON'] papi: ['ON'] deps: ['libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev'] - include: - - compiler: 'g++' - mpi: 'nompi' - boost: 'ON' - tbb: 'ON' - build-type: 'Release' - python: 'ON' - ompt: 'OFF' - papi: 'OFF' - deps: '' env: ELFUTILS_DOWNLOAD_VERSION: 0.186 @@ -440,7 +432,8 @@ jobs: wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && sudo apt-get -y --purge autoremove && sudo apt-get -y clean @@ -592,8 +585,9 @@ jobs: wget https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -P /opt/trace_processor/bin && chmod +x /opt/trace_processor/bin/trace_processor_shell && python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done && + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done && apt-get -y --purge autoremove && apt-get -y clean && /opt/conda/bin/conda clean -y -a diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index 78574f03..b69b0634 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -111,8 +111,9 @@ jobs: apt-get upgrade -y && apt-get install -y autoconf bison build-essential clang environment-modules gettext libfabric-dev libiberty-dev libomp-dev libopenmpi-dev libtool m4 openmpi-bin python3-pip texinfo ${{ matrix.compiler }} && python3 -m pip install --upgrade pip && - python3 -m pip install numpy perfetto dataclasses && - for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install numpy perfetto dataclasses; done + python3 -m pip install --upgrade numpy perfetto dataclasses && + python3 -m pip install 'cmake==3.18.4' && + for i in 6 7 8 9 10 11; do /opt/conda/envs/py3.${i}/bin/python -m pip install --upgrade numpy perfetto dataclasses; done - name: Install ROCm Packages timeout-minutes: 25 diff --git a/.gitmodules b/.gitmodules index 0b1ce3d0..2aecf69e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "external/timemory"] path = external/timemory - url = https://github.com/NERSC/timemory.git + url = https://github.com/ROCm/timemory.git [submodule "external/perfetto"] path = external/perfetto url = https://github.com/google/perfetto.git diff --git a/CMakeLists.txt b/CMakeLists.txt index bd161996..a3e3b60d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) diff --git a/README.md b/README.md index 963d5fc4..c4853014 100755 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ export LD_LIBRARY_PATH=/opt/rocprofiler-systems/lib:${LD_LIBRARY_PATH} Generate a rocprofiler-systems configuration file using `rocprof-sys-avail -G rocprof-sys.cfg`. Optionally, use `rocprof-sys-avail -G rocprof-sys.cfg --all` for a verbose configuration file with descriptions, categories, etc. Modify the configuration file as desired, e.g. enable -[perfetto](https://perfetto.dev/), [timemory](https://github.com/NERSC/timemory), sampling, and process-level sampling by default +[perfetto](https://perfetto.dev/), [timemory](https://github.com/ROCm/timemory), sampling, and process-level sampling by default and tweak some sampling default values: ```console diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index f3383505..eadbfe03 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -677,7 +677,7 @@ mark_as_advanced(TIMEMORY_PROJECT_NAME) rocprofiler_systems_checkout_git_submodule( RELATIVE_PATH external/timemory WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - REPO_URL https://github.com/NERSC/timemory.git + REPO_URL https://github.com/ROCm/timemory.git REPO_BRANCH omnitrace) rocprofiler_systems_save_variables( diff --git a/docs/conceptual/rocprof-sys-feature-set.rst b/docs/conceptual/rocprof-sys-feature-set.rst index b0502ea6..b26e8f13 100644 --- a/docs/conceptual/rocprof-sys-feature-set.rst +++ b/docs/conceptual/rocprof-sys-feature-set.rst @@ -7,7 +7,7 @@ The ROCm Systems Profiler feature set and use cases *************************************** `ROCm Systems Profiler `_ is designed to be highly extensible. -Internally, it leverages the `Timemory performance analysis toolkit `_ +Internally, it leverages the `Timemory performance analysis toolkit `_ to manage extensions, resources, data, and other items. It supports the following features, modes, metrics, and APIs. diff --git a/docs/how-to/configuring-runtime-options.rst b/docs/how-to/configuring-runtime-options.rst index 99467b13..bc816883 100644 --- a/docs/how-to/configuring-runtime-options.rst +++ b/docs/how-to/configuring-runtime-options.rst @@ -28,7 +28,7 @@ use the ``rocprof-sys-avail -G ~/.rocprof-sys.cfg --all`` option for a verbose configuration file with descriptions, categories, and additional information. Modify ``${HOME}/.rocprof-sys.cfg`` as required. For example, enable `Perfetto `_, -`Timemory `_, sampling, and process-level sampling by default +`Timemory `_, sampling, and process-level sampling by default and tweak the default sampling values. .. code-block:: shell @@ -64,7 +64,7 @@ accepts a case insensitive match for nearly all common Boolean logic expressions Exploring components ----------------------------------- -ROCm Systems Profiler uses `Timemory `_ extensively to provide +ROCm Systems Profiler uses `Timemory `_ extensively to provide various capabilities and manage data and resources. By default, with ``ROCPROFSYS_PROFILE=ON``, ROCm Systems Profiler only collects wall-clock timing values. However, by modifying the ``ROCPROFSYS_TIMEMORY_COMPONENTS`` setting, diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 8415afa4..4a2bda45 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-examples LANGUAGES C CXX) diff --git a/examples/causal/CMakeLists.txt b/examples/causal/CMakeLists.txt index 100f868b..27daf654 100644 --- a/examples/causal/CMakeLists.txt +++ b/examples/causal/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-causal-example LANGUAGES CXX) diff --git a/examples/code-coverage/CMakeLists.txt b/examples/code-coverage/CMakeLists.txt index 536fb21a..06fdb936 100644 --- a/examples/code-coverage/CMakeLists.txt +++ b/examples/code-coverage/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-code-coverage-example LANGUAGES CXX) diff --git a/examples/fork/CMakeLists.txt b/examples/fork/CMakeLists.txt index 5245f69d..4408265f 100644 --- a/examples/fork/CMakeLists.txt +++ b/examples/fork/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-fork LANGUAGES CXX) diff --git a/examples/lulesh/CMakeLists.txt b/examples/lulesh/CMakeLists.txt index 9c5fe99a..34ce262e 100644 --- a/examples/lulesh/CMakeLists.txt +++ b/examples/lulesh/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-lulesh-example LANGUAGES C CXX) diff --git a/examples/mpi/CMakeLists.txt b/examples/mpi/CMakeLists.txt index aec66705..109c7ed2 100644 --- a/examples/mpi/CMakeLists.txt +++ b/examples/mpi/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-mpi-examples LANGUAGES C CXX) diff --git a/examples/openmp/CMakeLists.txt b/examples/openmp/CMakeLists.txt index a550dd17..c7451766 100644 --- a/examples/openmp/CMakeLists.txt +++ b/examples/openmp/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-openmp LANGUAGES CXX) @@ -56,3 +56,5 @@ if(ROCPROFSYS_INSTALL_EXAMPLES) DESTINATION bin COMPONENT rocprofiler-systems-examples) endif() + +add_subdirectory(target) diff --git a/examples/openmp/target/CMakeLists.txt b/examples/openmp/target/CMakeLists.txt new file mode 100644 index 00000000..ddb971a1 --- /dev/null +++ b/examples/openmp/target/CMakeLists.txt @@ -0,0 +1,110 @@ +# +# +# +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +# try to find a compatible HIP version +if(ROCmVersion_MAJOR_VERSION AND ROCmVersion_MAJOR_VERSION GREATER_EQUAL 6) + find_package(hip ${ROCmVersion_MAJOR_VERSION}.0.0) +else() + find_package(hip) +endif() + +if(NOT hip_FOUND) + message(WARNING "ROCm >= 5.6 not found. Skipping OpenMP target example.") + return() +elseif(hip_FOUND AND hip_VERSION VERSION_LESS 5.6.0) + message( + WARNING + "ROCm >= 5.6 not found (found ${hip_VERSION}). Skipping OpenMP target example." + ) + return() +endif() + +if(NOT OMP_TARGET_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(OMP_TARGET_COMPILER + "${amdclangpp_EXECUTABLE}" + CACHE FILEPATH "OpenMP target compiler") + else() + message(WARNING "OpenMP target compiler not found. Skipping this example.") + return() + endif() +endif() + +project(rocprofiler-systems-example-openmp-target-lib LANGUAGES CXX) + +set(CMAKE_BUILD_TYPE "RelWithDebInfo") + +set(DEFAULT_GPU_TARGETS + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx940" + "gfx941" + "gfx942" + "gfx1030" + "gfx1010" + "gfx1100" + "gfx1101" + "gfx1102") + +set(GPU_TARGETS + "${DEFAULT_GPU_TARGETS}" + CACHE STRING "GPU targets to compile for") + +find_package(Threads REQUIRED) + +add_library(openmp-target-lib SHARED) +target_sources(openmp-target-lib PRIVATE library.cpp) +target_link_libraries(openmp-target-lib PUBLIC Threads::Threads) +target_compile_options(openmp-target-lib PRIVATE -fopenmp -ggdb) +target_link_options(openmp-target-lib PUBLIC -fopenmp) + +foreach(_TARGET ${GPU_TARGETS}) + target_compile_options(openmp-target-lib PRIVATE --offload-arch=${_TARGET}) + target_link_options(openmp-target-lib PUBLIC --offload-arch=${_TARGET}) +endforeach() + +message(STATUS "Using OpenMP target compiler: ${OMP_TARGET_COMPILER}") + +get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER} PATH) +get_filename_component(OMP_TARGET_COMPILER_DIR ${OMP_TARGET_COMPILER_DIR} PATH) + +message(STATUS "Using OpemMP target compiler directory: ${OMP_TARGET_COMPILER_DIR}") + +if(NOT EXISTS ${OMP_TARGET_COMPILER_DIR}/llvm/lib) + message(FATAL_ERROR "${OMP_TARGET_COMPILER_DIR}/llvm/lib does not exist") +endif() +set_target_properties( + openmp-target-lib + PROPERTIES BUILD_RPATH + "${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib" + OUTPUT_NAME "openmp-target" + POSITION_INDEPENDENT_CODE ON) + +rocprofiler_systems_custom_compilation(TARGET openmp-target-lib COMPILER + ${OMP_TARGET_COMPILER}) + +add_executable(openmp-target) +target_sources(openmp-target PRIVATE main.cpp) +target_link_libraries(openmp-target PRIVATE openmp-target-lib) +target_compile_options(openmp-target PRIVATE -ggdb) + +set_target_properties( + openmp-target + PROPERTIES BUILD_RPATH + "${OMP_TARGET_COMPILER_DIR}/llvm/lib:${OMP_TARGET_COMPILER_DIR}/lib" + POSITION_INDEPENDENT_CODE ON) + +rocprofiler_systems_custom_compilation(TARGET openmp-target COMPILER + ${OMP_TARGET_COMPILER}) diff --git a/examples/openmp/target/library.cpp b/examples/openmp/target/library.cpp new file mode 100644 index 00000000..580721df --- /dev/null +++ b/examples/openmp/target/library.cpp @@ -0,0 +1,149 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr float EPS_FLOAT = 1.0e-7f; +constexpr double EPS_DOUBLE = 1.0e-15; + +#pragma omp declare target +template +T +mul(T a, T b) +{ + T c; + c = a * b; + return c; +} +#pragma omp end declare target + +template +void +vmul(T* a, T* b, T* c, int N) +{ +#pragma omp target map(to : a [0:N], b [0:N]) map(from : c [0:N]) +#pragma omp teams distribute parallel for + for(int i = 0; i < N; i++) + { + c[i] = mul(a[i], b[i]); + } +} + +int +run_impl() +{ + std::this_thread::sleep_for(std::chrono::milliseconds{ 50 }); + + constexpr int N = 100000; + constexpr int Nc = N / 100; + int a_i[N], b_i[N], c_i[N], validate_i[N]; + float a_f[N], b_f[N], c_f[N], validate_f[N]; + double a_d[N], b_d[N], c_d[N], validate_d[N]; + int N_errors = 0; + bool flag = false; + +#pragma omp parallel for schedule(dynamic, Nc) + for(int i = 0; i < N; ++i) + { + a_f[i] = a_i[i] = i + 1; + b_f[i] = b_i[i] = i + 2; + a_d[i] = a_i[i]; + b_d[i] = b_i[i]; + validate_i[i] = a_i[i] * b_i[i]; + validate_f[i] = a_f[i] * b_f[i]; + validate_d[i] = a_d[i] * b_d[i]; + } + + for(int i = 0; i < 2; ++i) + { + vmul(a_i, b_i, c_i, N); + vmul(a_f, b_f, c_f, N); + vmul(a_d, b_d, c_d, N); + } + + for(int i = 0; i < N; i++) + { + if(c_i[i] != validate_i[i]) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_i[%d](%d) != validate_i[%d](%d)\n", i, c_i[i], i, + validate_i[i]); + flag = true; + } + } + } + flag = false; + for(int i = 0; i < N; i++) + { + if(fabs(c_f[i] - validate_f[i]) > EPS_FLOAT) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_f[%d](%f) != validate_f[%d](%f)\n", i, + static_cast(c_f[i]), i, + static_cast(validate_f[i])); + flag = true; + } + } + } + flag = false; + for(int i = 0; i < N; i++) + { + if(fabs(c_d[i] - validate_d[i]) > EPS_DOUBLE) + { + ++N_errors; + // print 1st bad index + if(!flag) + { + printf("First fail: c_d[%d](%f) != validate_d[%d](%f)\n", i, c_d[i], i, + validate_d[i]); + flag = true; + } + } + } + + return N_errors; +} + +int +run() +{ +#pragma omp parallel + { + run_impl(); + } + + return 0; +} diff --git a/examples/openmp/target/main.cpp b/examples/openmp/target/main.cpp new file mode 100644 index 00000000..081c03b9 --- /dev/null +++ b/examples/openmp/target/main.cpp @@ -0,0 +1,52 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include + +extern int +run(); + +int +main() +{ + auto N_errors = run(); + auto _ec = EXIT_SUCCESS; + if(N_errors == 0) + { + printf("Success\n"); + } + else + { + printf("Total %d failures\n", N_errors); + printf("Fail\n"); + _ec = EXIT_FAILURE; + } + + return _ec; +} diff --git a/examples/parallel-overhead/CMakeLists.txt b/examples/parallel-overhead/CMakeLists.txt index f8afbfc7..5733a18d 100644 --- a/examples/parallel-overhead/CMakeLists.txt +++ b/examples/parallel-overhead/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-parallel-overhead-example LANGUAGES CXX) diff --git a/examples/python/CMakeLists.txt b/examples/python/CMakeLists.txt index b882031f..c57e4625 100644 --- a/examples/python/CMakeLists.txt +++ b/examples/python/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-python) diff --git a/examples/rccl/CMakeLists.txt b/examples/rccl/CMakeLists.txt index 004a6cbd..799bf6d5 100644 --- a/examples/rccl/CMakeLists.txt +++ b/examples/rccl/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-rccl-example LANGUAGES CXX) diff --git a/examples/rewrite-caller/CMakeLists.txt b/examples/rewrite-caller/CMakeLists.txt index 1245b823..47f3a645 100644 --- a/examples/rewrite-caller/CMakeLists.txt +++ b/examples/rewrite-caller/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-rewrite-caller-example LANGUAGES CXX) diff --git a/examples/trace-time-window/CMakeLists.txt b/examples/trace-time-window/CMakeLists.txt index 2ab3d671..3b3716d5 100644 --- a/examples/trace-time-window/CMakeLists.txt +++ b/examples/trace-time-window/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-trace-time-window-example LANGUAGES CXX) diff --git a/examples/transpose/CMakeLists.txt b/examples/transpose/CMakeLists.txt index cff00c42..001571f5 100644 --- a/examples/transpose/CMakeLists.txt +++ b/examples/transpose/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-transpose-example LANGUAGES CXX) diff --git a/examples/user-api/CMakeLists.txt b/examples/user-api/CMakeLists.txt index 19e1eee6..f002f662 100644 --- a/examples/user-api/CMakeLists.txt +++ b/examples/user-api/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-user-api-example LANGUAGES CXX) diff --git a/external/timemory b/external/timemory index 9ce43f32..68ce4200 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 9ce43f32939dd71aa8905f5c72537e9a3ffcc7c2 +Subproject commit 68ce420086bfd73cbf0986c5ad10d811c8934f78 diff --git a/scripts/run-ci.sh b/scripts/run-ci.sh index 78b7cf97..94091dff 100755 --- a/scripts/run-ci.sh +++ b/scripts/run-ci.sh @@ -207,7 +207,7 @@ EOF verbose-run cd ${BINARY_DIR} cat << EOF > dashboard.cmake -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) include("\${CMAKE_CURRENT_LIST_DIR}/CTestCustom.cmake") diff --git a/scripts/test-find-package.sh b/scripts/test-find-package.sh index d872a3e8..cd0dab61 100755 --- a/scripts/test-find-package.sh +++ b/scripts/test-find-package.sh @@ -112,7 +112,7 @@ verbose-run cp -v -r ${EXAMPLE_DIR}/${EXAMPLE_NAME}/* ${SOURCE_DIR}/ verbose-run pushd ${SOURCE_DIR} cat << EOF > CMakeLists.txt -cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18.4.0 FATAL_ERROR) project(test LANGUAGES C CXX) diff --git a/scripts/write-rocprof-sys-install.cmake b/scripts/write-rocprof-sys-install.cmake index 6ee620d8..9552722a 100644 --- a/scripts/write-rocprof-sys-install.cmake +++ b/scripts/write-rocprof-sys-install.cmake @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.18.4) if(NOT DEFINED ROCPROFSYS_VERSION) file(READ "${CMAKE_CURRENT_LIST_DIR}/../VERSION" FULL_VERSION_STRING LIMIT_COUNT 1) diff --git a/source/lib/rocprof-sys/library/ompt.cpp b/source/lib/rocprof-sys/library/ompt.cpp index 6cb66c38..eac1d337 100644 --- a/source/lib/rocprof-sys/library/ompt.cpp +++ b/source/lib/rocprof-sys/library/ompt.cpp @@ -30,27 +30,171 @@ #if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0 +# include "binary/link_map.hpp" # include "core/components/fwd.hpp" # include "library/components/category_region.hpp" +# include "library/tracing.hpp" # include +# include +# include +# include # include +# include # include # include +# include +# include +# include +# include +# include +# include # include +# include +# include -using api_t = TIMEMORY_API; -using ompt_handle_t = tim::component::ompt_handle; -using ompt_context_t = tim::openmp::context_handler; -using ompt_toolset_t = typename ompt_handle_t::toolset_type; -using ompt_bundle_t = tim::component_tuple; +using api_t = tim::project::rocprofsys; -extern "C" +namespace rocprofsys { - ompt_start_tool_result_t* ompt_start_tool(unsigned int, - const char*) ROCPROFSYS_PUBLIC_API; -} +namespace component +{ +struct ompt : comp::base +{ + using value_type = void; + using base_type = comp::base; + using context_info_t = tim::openmp::context_info; + + static std::string label() { return "ompt"; } + static std::string description() { return "OpenMP tools tracing"; } + + ompt() = default; + ~ompt() = default; + ompt(const ompt&) = default; + ompt(ompt&&) noexcept = default; + + ompt& operator=(const ompt&) = default; + ompt& operator=(ompt&&) noexcept = default; + + template + void start(const context_info_t& _ctx_info, Args&&...) const + { + category_region::start(m_prefix); + + auto _ts = tracing::now(); + uint64_t _cid = + (_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0; + auto _annotate = [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", _ts); + for(const auto& itr : _ctx_info.arguments) + tracing::add_perfetto_annotation(ctx, itr.label, itr.value); + } + }; + + if(_cid > 0) + { + category_region::start( + (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, + ::perfetto::Flow::ProcessScoped(_cid), std::move(_annotate)); + } + else + { + category_region::start( + (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, + std::move(_annotate)); + } + } + + template + void stop(const context_info_t& _ctx_info, Args&&...) const + { + category_region::stop(m_prefix); + + auto _ts = tracing::now(); + uint64_t _cid = + (_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0; + auto _annotate = [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "end_ns", _ts); + for(const auto& itr : _ctx_info.arguments) + tracing::add_perfetto_annotation(ctx, itr.label, itr.value); + } + }; + + if(_cid > 0) + { + category_region::stop( + (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, + std::move(_annotate)); + } + else + { + category_region::stop( + (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, + std::move(_annotate)); + } + } + + template + void store(const context_info_t& _ctx_info, Args&&... _args) const + { + start(_ctx_info, std::forward(_args)...); + stop(_ctx_info, std::forward(_args)...); + } + + static void record(std::string_view name, ompt_id_t id, uint64_t beg_time, + uint64_t end_time, uint64_t thrd_id, uint64_t targ_id, + const context_info_t& common) + { + (void) thrd_id; + (void) targ_id; + + auto _annotate = [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + for(const auto& itr : common.arguments) + tracing::add_perfetto_annotation(ctx, itr.label, itr.value); + } + }; + + auto _track = tracing::get_perfetto_track( + category::ompt{}, + [](uint64_t _targ_id_v) { + return ::timemory::join::join("", "OMP Target ", _targ_id_v); + }, + targ_id); + + category_region::start( + name, _track, beg_time, ::perfetto::Flow::ProcessScoped(id), + std::move(_annotate)); + + category_region::stop(name, _track, + end_time); + } + + void set_prefix(std::string_view _v) { m_prefix = _v; } + +private: + std::string_view m_prefix = {}; +}; +} // namespace component +} // namespace rocprofsys + +namespace tim +{ +namespace trait +{ +template <> +struct ompt_handle +{ + using type = component_tuple<::rocprofsys::component::ompt>; +}; +} // namespace trait +} // namespace tim namespace rocprofsys { @@ -58,6 +202,11 @@ namespace ompt { namespace { +using ompt_handle_t = tim::component::ompt_handle; +using ompt_context_t = tim::openmp::context_handler; +using ompt_toolset_t = typename ompt_handle_t::toolset_type; +using ompt_bundle_t = tim::component_tuple; + std::unique_ptr f_bundle = {}; bool _init_toolset_off = (trait::runtime_enabled::set(false), trait::runtime_enabled::set(false), true); @@ -70,10 +219,7 @@ setup() if(!tim::settings::enabled()) return; trait::runtime_enabled::set(true); trait::runtime_enabled::set(true); - comp::user_ompt_bundle::global_init(); - comp::user_ompt_bundle::reset(); tim::auto_lock_t lk{ tim::type_mutex() }; - comp::user_ompt_bundle::configure>(); f_bundle = std::make_unique("rocprofsys/ompt", quirk::config{}); } @@ -91,10 +237,15 @@ shutdown() ompt_context_t::cleanup(); trait::runtime_enabled::set(false); trait::runtime_enabled::set(false); - comp::user_ompt_bundle::reset(); pthread_gotcha::shutdown(); // call the OMPT finalize callback - if(f_finalize) (*f_finalize)(); + if(f_finalize) + { + for(const auto& itr : tim::openmp::get_ompt_device_functions()) + if(itr.second.stop_trace) itr.second.stop_trace(itr.second.device); + (*f_finalize)(); + f_finalize = nullptr; + } } f_bundle.reset(); _protect = false; @@ -115,21 +266,232 @@ tool_initialize(ompt_function_lookup_t lookup, int initial_device_num, { if(!rocprofsys::settings_are_configured()) { - ROCPROFSYS_BASIC_WARNING( + ROCPROFSYS_BASIC_WARNING_F( 0, "[%s] invoked before rocprof-sys was initialized. In instrumentation mode, " "settings exported to the environment have not been propagated yet...\n", __FUNCTION__); - rocprofsys::configure_settings(); + use_tool() = get_env("ROCPROFSYS_USE_OMPT", true, false); + } + else + { + use_tool() = rocprofsys::config::get_use_ompt(); } - use_tool() = rocprofsys::config::get_use_ompt(); if(use_tool()) { - TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", - initial_device_num); - f_finalize = tim::ompt::configure( - lookup, initial_device_num, tool_data); + ROCPROFSYS_BASIC_VERBOSE_F( + 2, "OpenMP-tools configuring for initial device %i\n\n", initial_device_num); + + static auto _generate_key = [](std::string_view _key_v, + const ::tim::openmp::argument_array_t& _args_v) { + return std::string{ _key_v }; + (void) _args_v; + }; + + tim::openmp::get_codeptr_ra_resolver() = + [](tim::openmp::context_info& _ctx_info) { + const auto& _key = _ctx_info.label; + const auto* codeptr_ra = _ctx_info.codeptr_ra; + auto& _args = _ctx_info.arguments; + + ROCPROFSYS_BASIC_VERBOSE(2, "resolving codeptr return address for %s\n", + _key.data()); + + if(!codeptr_ra) return _generate_key(_key, _args); + + static thread_local auto _once = std::once_flag{}; + std::call_once(_once, []() { ::tim::unwind::update_file_maps(); }); + + auto _info = ::rocprofsys::binary::lookup_ipaddr_entry( + reinterpret_cast(codeptr_ra)); + + if(_info) + { + _ctx_info.func = tim::demangle(_info->name); + if(_info->lineno > 0) + { + auto _linfo = _info->lineinfo.rget([](const auto& _v) -> bool { + return (_v && !_v.location.empty() && _v.line > 0); + }); + + if(_linfo) + { + _ctx_info.file = _linfo.location; + _ctx_info.line = _linfo.line; + _args.emplace_back("file", _ctx_info.file); + _args.emplace_back("lineinfo", + ::timemory::join::join("@", _ctx_info.file, + _ctx_info.line)); + } + else + { + _ctx_info.file = _info->location; + _args.emplace_back("file", _ctx_info.file); + } + + return _generate_key( + ::timemory::join::join(" @ ", _key, _ctx_info.func), _args); + } + else + { + return _generate_key( + ::timemory::join::join(" @ ", _key, _ctx_info.func), _args); + } + } + else + { + auto _dl_info = Dl_info{ nullptr, nullptr, nullptr, nullptr }; + if(dladdr(codeptr_ra, &_dl_info) != 0) + { + _ctx_info.file = _dl_info.dli_fname; + _ctx_info.func = tim::demangle(_dl_info.dli_sname); + _args.emplace_back("file", _ctx_info.file); + return _generate_key( + ::timemory::join::join( + " @ ", _key, + ::timemory::join::join("", _ctx_info.func, " [", + _ctx_info.file, "]")), + _args); + } + } + + // since no line info could be deduced, include the codeptr return address + auto _args_codeptr_v = _args; + _args_codeptr_v.emplace_back("codeptr_ra", codeptr_ra); + return _generate_key(_key, _args_codeptr_v); + }; + + tim::openmp::get_function_lookup_callback< + api_t>() = [](ompt_function_lookup_t, + const std::optional& + params) { + if(!params) return; + + ROCPROFSYS_VERBOSE(3, "[ompt] configuring device %i...\n", + params->device_num); + + auto& device_funcs = + tim::openmp::get_ompt_device_functions().at(params->device_num); + + device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_data_op); + device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_submit); + + static ompt_callback_buffer_request_t request = + [](int device_num, ompt_buffer_t** buffer, size_t* bytes) { + ROCPROFSYS_VERBOSE(3, "[ompt] buffer request...\n"); + *bytes = ::tim::units::get_page_size(); + *buffer = mmap(nullptr, *bytes, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + (void) device_num; + }; + + static ompt_callback_buffer_complete_t complete = [](int device_num, + ompt_buffer_t* buffer, + size_t bytes, + ompt_buffer_cursor_t + begin, + int buffer_owned) { + ROCPROFSYS_VERBOSE(3, "[ompt] buffer complete...\n"); + tim::consume_parameters(device_num, buffer, bytes, begin, buffer_owned); + + auto _funcs = + tim::openmp::get_ompt_device_functions().at(device_num); + auto _skew = rocprofsys::tracing::get_clock_skew( + [&_funcs]() { return _funcs.get_device_time(_funcs.device); }); + + ompt_buffer_cursor_t _cursor = begin; + size_t _nrecords = 0; + do + { + if(_cursor == 0) break; + ++_nrecords; + auto* _record = _funcs.get_record_ompt(buffer, _cursor); + if(_record) + { + const char* _type = tim::openmp::get_enum_label(_record->type); + auto _thrd_id = _record->thread_id; + auto _targ_id = _record->target_id; + + unsigned long beg_time = _record->time + _skew; + unsigned long end_time = 0; + ompt_id_t id = 0; + const char* _name = tim::openmp::get_enum_label(_record->type); + + if(_record->type == ompt_callback_target_submit) + { + auto& _data = _record->record.target_kernel; + end_time = _data.end_time + _skew; + id = _data.host_op_id; + + auto _ctx_info = tim::openmp::argument_array_t{ + { "begin_ns", beg_time }, + { "end_ns", end_time }, + { "type", _type }, + { "thread_id", _thrd_id }, + { "target_id", _targ_id }, + { "host_op_id", id }, + { "requested_num_teams", _data.requested_num_teams }, + { "granted_num_teams", _data.granted_num_teams } + }; + + component::ompt::record( + _name, id, beg_time, end_time, _thrd_id, _targ_id, + tim::openmp::context_info{ _name, nullptr, _ctx_info }); + } + else if(_record->type == ompt_callback_target_data_op) + { + auto& _data = _record->record.target_data_op; + end_time = _data.end_time + _skew; + id = _data.host_op_id; + const auto* _opname = + tim::openmp::get_enum_label(_data.optype); + + auto _ctx_info = tim::openmp::argument_array_t{ + { "begin_ns", beg_time }, + { "end_ns", end_time }, + { "type", _type }, + { "thread_id", _thrd_id }, + { "target_id", _targ_id }, + { "host_op_id", id }, + { "optype", _opname }, + { "src_addr", reinterpret_cast(_data.src_addr) }, + { "dst_addr", reinterpret_cast(_data.dest_addr) }, + { "src_device_num", _data.src_device_num }, + { "dst_device_num", _data.dest_device_num }, + { "bytes", _data.bytes }, + }; + + component::ompt::record( + _opname, id, beg_time, end_time, _thrd_id, _targ_id, + tim::openmp::context_info{ _name, nullptr, _ctx_info }); + } + + ROCPROFSYS_VERBOSE( + 3, + "type=%i, type_name=%s, start=%lu, end=%lu, delta=%lu, " + "tid=%lu, target_id=%lu, host_id=%lu\n", + _record->type, tim::openmp::get_enum_label(_record->type), + beg_time, end_time, (end_time - beg_time), _record->thread_id, + _record->target_id, id); + } + + _funcs.advance_buffer_cursor(_funcs.device, buffer, bytes, _cursor, + &_cursor); + } while(_cursor != 0); + + ROCPROFSYS_VERBOSE(3, "[ompt] number of records: %zu\n", _nrecords); + + if(buffer_owned == 1) + { + ::munmap(buffer, bytes); + } + }; + + device_funcs.start_trace(params->device, request, complete); + }; + + f_finalize = tim::ompt::configure(lookup, initial_device_num, tool_data); } return 1; // success } @@ -143,18 +505,23 @@ tool_finalize(ompt_data_t*) } // namespace ompt } // namespace rocprofsys -extern "C" ompt_start_tool_result_t* -ompt_start_tool(unsigned int omp_version, const char* runtime_version) +extern "C" { - ROCPROFSYS_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n", - omp_version, runtime_version); - ROCPROFSYS_METADATA("OMP_VERSION", omp_version); - ROCPROFSYS_METADATA("OMP_RUNTIME_VERSION", runtime_version); + ompt_start_tool_result_t* ompt_start_tool(unsigned int, + const char*) ROCPROFSYS_PUBLIC_API; - static auto* data = new ompt_start_tool_result_t{ &rocprofsys::ompt::tool_initialize, - &rocprofsys::ompt::tool_finalize, - { 0 } }; - return data; + ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, + const char* runtime_version) + { + ROCPROFSYS_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n", + omp_version, runtime_version); + ROCPROFSYS_METADATA("OMP_VERSION", omp_version); + ROCPROFSYS_METADATA("OMP_RUNTIME_VERSION", runtime_version); + static auto* data = new ompt_start_tool_result_t{ + &rocprofsys::ompt::tool_initialize, &rocprofsys::ompt::tool_finalize, { 0 } + }; + return data; + } } #else diff --git a/source/lib/rocprof-sys/library/roctracer.cpp b/source/lib/rocprof-sys/library/roctracer.cpp index daf6bb8a..b7abcf71 100644 --- a/source/lib/rocprof-sys/library/roctracer.cpp +++ b/source/lib/rocprof-sys/library/roctracer.cpp @@ -163,55 +163,18 @@ get_clock_skew() static auto _use = tim::get_env("ROCPROFSYS_USE_ROCTRACER_CLOCK_SKEW", true); if(!_use) return 0; static auto _v = []() { - namespace cpu = tim::cpu; - // synchronize timestamps - // We'll take a CPU timestamp before and after taking a GPU timestmp, then - // take the average of those two, hoping that it's roughly at the same time - // as the GPU timestamp. - static auto _cpu_now = []() { - cpu::fence(); - return comp::wall_clock::record(); - }; - - static auto _gpu_now = []() { - cpu::fence(); + auto _gpu_now = []() { uint64_t _ts = 0; - ROCPROFSYS_ROCTRACER_CALL(roctracer_get_timestamp(&_ts)); + roctracer_get_timestamp(&_ts); return _ts; }; - do - { - // warm up cache and allow for any static initialization - (void) _cpu_now(); - (void) _gpu_now(); - } while(false); - - auto _compute = [](volatile uint64_t& _cpu_ts, volatile uint64_t& _gpu_ts) { - _cpu_ts = 0; - _gpu_ts = 0; - _cpu_ts += _cpu_now() / 2; - _gpu_ts += _gpu_now() / 1; - _cpu_ts += _cpu_now() / 2; - return static_cast(_cpu_ts) - static_cast(_gpu_ts); - }; - constexpr int64_t _n = 10; - int64_t _cpu_ave = 0; - int64_t _gpu_ave = 0; - int64_t _diff = 0; - for(int64_t i = 0; i < _n; ++i) - { - volatile uint64_t _cpu_ts = 0; - volatile uint64_t _gpu_ts = 0; - _diff += _compute(_cpu_ts, _gpu_ts); - _cpu_ave += _cpu_ts / _n; - _gpu_ave += _gpu_ts / _n; - } - ROCPROFSYS_BASIC_VERBOSE(2, "CPU timestamp: %li\n", _cpu_ave); - ROCPROFSYS_BASIC_VERBOSE(2, "HIP timestamp: %li\n", _gpu_ave); + // discard (warm-up) + (void) tracing::get_clock_skew(_gpu_now, 1); + + auto _diff = tracing::get_clock_skew(_gpu_now, 10); ROCPROFSYS_BASIC_VERBOSE(1, "CPU/HIP timestamp skew: %li (used: %s)\n", _diff, _use ? "yes" : "no"); - _diff /= _n; return _diff; }(); return _v; diff --git a/source/lib/rocprof-sys/library/tracing.hpp b/source/lib/rocprof-sys/library/tracing.hpp index 3db7fe5a..624d30f8 100644 --- a/source/lib/rocprof-sys/library/tracing.hpp +++ b/source/lib/rocprof-sys/library/tracing.hpp @@ -651,5 +651,41 @@ mark_perfetto_track(CategoryT, const char* name, ::perfetto::Track _track, uint6 TRACE_EVENT_INSTANT(trait::name::value, ::perfetto::DynamicString{ name }, _track, _ts, std::forward(args)...); } + +template +int64_t +get_clock_skew(FuncT&& _timestamp_func, int64_t _n = 1) +{ + namespace cpu = tim::cpu; + // synchronize timestamps + // We'll take a CPU timestamp before and after taking a GPU timestmp, then + // take the average of those two, hoping that it's roughly at the same time + // as the GPU timestamp. + auto _cpu_now = []() { + cpu::fence(); + return now(); + }; + + auto _gpu_now = [&_timestamp_func]() { + cpu::fence(); + return std::forward(_timestamp_func)(); + }; + + auto _compute = [&_cpu_now, &_gpu_now]() { + volatile uint64_t _cpu_ts = 0; + volatile uint64_t _gpu_ts = 0; + _cpu_ts += _cpu_now(); + _gpu_ts += _gpu_now(); + _cpu_ts += _cpu_now(); + return static_cast(_cpu_ts / 2) - static_cast(_gpu_ts); + }; + + int64_t _diff = 0; + for(int64_t i = 0; i < _n; ++i) + { + _diff += _compute(); + } + return (_diff / _n); +} } // namespace tracing } // namespace rocprofsys From cef228bfbd9a0e44e5f73a8230c2082bb78edda2 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 8 Nov 2024 18:49:42 -0500 Subject: [PATCH 07/20] Update cmake version installed in dockerfiles (#25) * Update cmake version installed in dockerfiles * Standardize the cmake_minimum_required to 3.18.4 across dockerfiles * Fix link to perl repo in opensuse docker. --------- Signed-off-by: David Galiffi --- docker/Dockerfile.opensuse | 8 +++++--- docker/Dockerfile.opensuse.ci | 3 ++- docker/Dockerfile.rhel | 6 ++++-- docker/Dockerfile.rhel.ci | 3 ++- docker/Dockerfile.ubuntu | 6 +++++- docker/Dockerfile.ubuntu.ci | 3 ++- 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/docker/Dockerfile.opensuse b/docker/Dockerfile.opensuse index 97a55c0d..2dbb1b5d 100644 --- a/docker/Dockerfile.opensuse +++ b/docker/Dockerfile.opensuse @@ -23,14 +23,16 @@ RUN set +e; \ RUN zypper --non-interactive update -y && \ zypper --non-interactive dist-upgrade -y && \ zypper --non-interactive install -y -t pattern devel_basis && \ - zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel gcc-c++ git libnuma-devel openmpi3-devel python3-pip rpm-build wget + zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel \ + gcc-c++ git libnuma-devel openmpi3-devel python3-pip rpm-build wget && \ + python3 -m pip install 'cmake==3.18.4' ARG ROCM_VERSION=0.0 ARG AMDGPU_RPM=6.2/sle/15.6/amdgpu-install-6.2.60200-1.noarch.rpm -ARG PERL_REPO=SLE_15 RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ - zypper --non-interactive addrepo https://mirrorcache-us.opensuse.org/repositories/devel:/languages:/perl/${PERL_REPO}/devel:languages:perl.repo && \ + OS_VERSION_MINOR=$(cat /etc/os-release | grep 'VERSION_ID' | sed 's/=/ /1' | awk '{print $NF}' | sed 's/"//g' | sed 's/\./ /g' | awk '{print $2}') && \ + zypper --non-interactive addrepo https://download.opensuse.org/repositories/devel:languages:perl/15.${OS_VERSION_MINOR}/devel:languages:perl.repo && \ zypper --non-interactive --no-gpg-checks install -y https://repo.radeon.com/amdgpu-install/${AMDGPU_RPM} && \ zypper --non-interactive --gpg-auto-import-keys refresh && \ zypper --non-interactive install -y rocm-dev rocm-smi-lib roctracer-dev rocprofiler-dev rccl-devel libpciaccess0 && \ diff --git a/docker/Dockerfile.opensuse.ci b/docker/Dockerfile.opensuse.ci index 2169026e..9d7ac120 100644 --- a/docker/Dockerfile.opensuse.ci +++ b/docker/Dockerfile.opensuse.ci @@ -30,7 +30,8 @@ RUN zypper --non-interactive update -y && \ zypper --non-interactive install -y binutils-gold cmake curl dpkg-devel \ gcc-c++ git libnuma-devel openmpi3-devel papi-devel python3-pip \ rpm-build wget && \ - zypper --non-interactive clean --all + zypper --non-interactive clean --all && \ + python3 -m pip install 'cmake==3.18.4' COPY ./dyninst-source /tmp/dyninst diff --git a/docker/Dockerfile.rhel b/docker/Dockerfile.rhel index 6808dc4b..298d69ed 100644 --- a/docker/Dockerfile.rhel +++ b/docker/Dockerfile.rhel @@ -15,8 +15,10 @@ ENV LIBRARY_PATH ${LIBRARY_PATH}:/opt/amdgpu/lib64 RUN yum groupinstall -y "Development Tools" && \ yum install -y epel-release && crb enable && \ - yum install -y --allowerasing cmake curl dpkg-devel numactl-devel openmpi-devel papi-devel python3-pip texinfo wget which zlib-devel && \ - yum clean all + yum install -y --allowerasing cmake curl dpkg-devel numactl-devel openmpi-devel \ + papi-devel python3-pip texinfo wget which zlib-devel && \ + yum clean all && \ + python3 -m pip install 'cmake==3.18.4' ARG ROCM_VERSION=0.0 ARG AMDGPU_RPM=6.2/rhel/9.4/amdgpu-install-6.2.60202-1.el9.noarch.rpm diff --git a/docker/Dockerfile.rhel.ci b/docker/Dockerfile.rhel.ci index b2261c8f..2429a2bb 100644 --- a/docker/Dockerfile.rhel.ci +++ b/docker/Dockerfile.rhel.ci @@ -21,7 +21,8 @@ RUN yum groupinstall -y "Development Tools" && \ yum install -y epel-release && crb enable && \ yum install -y --allowerasing cmake curl dpkg-devel numactl-devel \ openmpi-devel papi-devel python3-pip texinfo wget which zlib-devel && \ - yum clean all + yum clean all && \ + python3 -m pip install 'cmake==3.18.4' COPY ./dyninst-source /tmp/dyninst diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 10f428cc..02e5c58f 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -24,7 +24,11 @@ ENV PATH ${HOME}/.local/bin:${PATH} RUN apt-get update && \ apt-get dist-upgrade -y && \ - apt-get install -y apt-utils autoconf autotools-dev bash-completion bison build-essential cmake curl git-core gnupg2 libnuma1 libopenmpi-dev libpapi-dev libpfm4-dev librpm-dev libtool libudev1 lsb-release m4 python3-pip rpm texinfo wget + apt-get install -y apt-utils autoconf autotools-dev bash-completion bison \ + build-essential cmake curl git-core gnupg2 libnuma1 libopenmpi-dev \ + libpapi-dev libpfm4-dev librpm-dev libtool libudev1 lsb-release m4 \ + python3-pip rpm texinfo wget && \ + python3 -m pip install 'cmake==3.18.4' RUN if [ "${ROCM_VERSION}" != "0.0" ]; then \ wget https://repo.radeon.com/amdgpu-install/${ROCM_VERSION}/ubuntu/${ROCM_REPO_DIST}/${AMDGPU_DEB} && \ diff --git a/docker/Dockerfile.ubuntu.ci b/docker/Dockerfile.ubuntu.ci index 755db147..c505bc32 100644 --- a/docker/Dockerfile.ubuntu.ci +++ b/docker/Dockerfile.ubuntu.ci @@ -29,7 +29,8 @@ RUN apt-get update && \ bzip2 cmake curl environment-modules git-core gnupg2 gzip libiberty-dev \ libpapi-dev libpfm4-dev libtool locales lsb-release m4 python3-pip texinfo \ unzip wget zip zlib1g-dev && \ - apt-get autoclean + apt-get autoclean && \ + python3 -m pip install 'cmake==3.18.4' COPY ./dyninst-source /tmp/dyninst From 3d10efc4113395aaf551b6ad0db8e4822fdedcd5 Mon Sep 17 00:00:00 2001 From: ajanicijamd Date: Sun, 10 Nov 2024 13:33:57 -0500 Subject: [PATCH 08/20] Changed libdir for external libraries built with autotools (#24) --- .gitmodules | 2 +- cmake/ElfUtils.cmake | 2 +- cmake/PAPI.cmake | 1 + external/dyninst | 2 +- external/timemory | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index 2aecf69e..2515e865 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,7 +9,7 @@ url = https://github.com/jrmadsen/ELFIO.git [submodule "external/dyninst"] path = external/dyninst - url = https://github.com/jrmadsen/dyninst.git + url = https://github.com/ROCm/dyninst.git [submodule "external/PTL"] path = external/PTL url = https://github.com/jrmadsen/PTL.git diff --git a/cmake/ElfUtils.cmake b/cmake/ElfUtils.cmake index 8a829557..15d46bae 100644 --- a/cmake/ElfUtils.cmake +++ b/cmake/ElfUtils.cmake @@ -112,7 +112,7 @@ externalproject_add( CXXFLAGS=-fPIC\ -O3\ -Wno-error=null-dereference [=[LDFLAGS=-Wl,-rpath='$$ORIGIN']=] /configure --enable-install-elfh --prefix=${_eu_root} --disable-libdebuginfod --disable-debuginfod --disable-nls - --enable-thread-safety --enable-silent-rules + --enable-thread-safety --enable-silent-rules --libdir=${_eu_root}/lib BUILD_COMMAND ${MAKE_COMMAND} install -s BUILD_BYPRODUCTS "${_eu_build_byproducts}" INSTALL_COMMAND "") diff --git a/cmake/PAPI.cmake b/cmake/PAPI.cmake index 5d0d0f45..7c933669 100644 --- a/cmake/PAPI.cmake +++ b/cmake/PAPI.cmake @@ -211,6 +211,7 @@ externalproject_add( --prefix=${ROCPROFSYS_PAPI_INSTALL_DIR} --with-static-lib=yes --with-shared-lib=no --with-perf-events --with-tests=no --with-components=${_ROCPROFSYS_PAPI_COMPONENTS} + --libdir=${ROCPROFSYS_PAPI_INSTALL_DIR}/lib CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s BUILD_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation diff --git a/external/dyninst b/external/dyninst index 3d1942de..ccf4c247 160000 --- a/external/dyninst +++ b/external/dyninst @@ -1 +1 @@ -Subproject commit 3d1942dee70a0008a7bb9e5f18d8b4abc0a91290 +Subproject commit ccf4c247497a24742e418883350df5beb74ecec9 diff --git a/external/timemory b/external/timemory index 68ce4200..ba3c6486 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 68ce420086bfd73cbf0986c5ad10d811c8934f78 +Subproject commit ba3c648677b3c6f217abe147ef3198f36239e234 From 51446f715f0b40e1c9a51b345bb1a5eb0a3dc37f Mon Sep 17 00:00:00 2001 From: Pran Swarup Date: Wed, 13 Nov 2024 15:02:43 -0500 Subject: [PATCH 09/20] =?UTF-8?q?Fix=20GPU=20resource=20data=20of=20GPU=20?= =?UTF-8?q?power=20and=20temperature=20is=20not=20present=20on=20=E2=80=A6?= =?UTF-8?q?=20(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix GPU resource data of GPU power and temperature is not present on MI300A traces --- source/lib/rocprof-sys/library/rocm_smi.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/lib/rocprof-sys/library/rocm_smi.cpp b/source/lib/rocprof-sys/library/rocm_smi.cpp index 73373a03..202a8cd8 100644 --- a/source/lib/rocprof-sys/library/rocm_smi.cpp +++ b/source/lib/rocprof-sys/library/rocm_smi.cpp @@ -153,9 +153,10 @@ data::sample(uint32_t _dev_id) ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).busy, rsmi_dev_busy_percent_get, _dev_id, &m_busy_perc); ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).temp, rsmi_dev_temp_metric_get, _dev_id, - RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &m_temp); - ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).power, rsmi_dev_power_ave_get, _dev_id, 0, - &m_power); + RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &m_temp); + RSMI_POWER_TYPE power_type = RSMI_CURRENT_POWER; + ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).power, rsmi_dev_power_get, _dev_id, + &m_power, &power_type) ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get, _dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage); From ba0efacd3f16c1d803f20e4527b546261943adab Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 14 Nov 2024 13:26:52 -0500 Subject: [PATCH 10/20] Execute arguments via eval (#28) Porting https://github.com/ROCm/omnitrace/pull/410. As eval builtin interprets its arguments in the same way as shell would do, which would need some escape work, otherwise, it won't work if the input arguments contains e.g. a JSON string: omnitrace-python -- ./test.py --json='{"foo": "bar"}' --- cmake/Templates/console-script.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Templates/console-script.in b/cmake/Templates/console-script.in index 039de3e4..76a5fafb 100755 --- a/cmake/Templates/console-script.in +++ b/cmake/Templates/console-script.in @@ -14,4 +14,4 @@ run-script() eval $@ } -run-script ${PYTHON_EXECUTABLE} -m @SCRIPT_SUBMODULE@ $@ +run-script ${PYTHON_EXECUTABLE} -m @SCRIPT_SUBMODULE@ "$(printf ' %q' "$@")" From bf1f1e64d61c04f627b8710fb4620f0573628b6b Mon Sep 17 00:00:00 2001 From: cfallows-amd Date: Fri, 15 Nov 2024 17:16:05 -0500 Subject: [PATCH 11/20] Run papi lib configure step as nonparallel during build (#29) Limit to single job during the configure step of rocprofiler-systems-papi-build; error due to files not being available in time if number of jobs is too low. Signed-off-by: Carrie Fallows --- cmake/PAPI.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/PAPI.cmake b/cmake/PAPI.cmake index 7c933669..780dd101 100644 --- a/cmake/PAPI.cmake +++ b/cmake/PAPI.cmake @@ -45,6 +45,9 @@ if(NOT EXISTS "${ROCPROFSYS_PAPI_INSTALL_DIR}") ${ROCPROFSYS_PAPI_INSTALL_DIR}/lib/libpfm.so) endif() +# Set ROCPROFSYS_PAPI_CONFIGURE_JOBS for commands that need to be run nonparallel +set(ROCPROFSYS_PAPI_CONFIGURE_JOBS 1) + rocprofiler_systems_add_option(ROCPROFSYS_PAPI_AUTO_COMPONENTS "Automatically enable components" OFF) @@ -212,8 +215,10 @@ externalproject_add( --with-perf-events --with-tests=no --with-components=${_ROCPROFSYS_PAPI_COMPONENTS} --libdir=${ROCPROFSYS_PAPI_INSTALL_DIR}/lib - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation - ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s + CONFIGURE_COMMAND + ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation + ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} static install -s -j + ${ROCPROFSYS_PAPI_CONFIGURE_JOBS} BUILD_COMMAND ${CMAKE_COMMAND} -E env CFLAGS=-fPIC\ -O3\ -Wno-stringop-truncation ${ROCPROFSYS_PAPI_EXTRA_ENV} ${MAKE_EXECUTABLE} utils install-utils -s INSTALL_COMMAND "" From 3c1db92d9e45371b5fd948dc02059cf545a9fdfc Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 14 Nov 2024 23:09:39 -0500 Subject: [PATCH 12/20] Weekly promotion of staging to mainline Perform an ff-only merge of `amd-staging` into an `amd-mainline` staging branch and open a pull request. --- .github/workflows/opensuse.yml | 2 ++ .github/workflows/redhat.yml | 2 ++ .github/workflows/ubuntu-focal.yml | 2 ++ .github/workflows/ubuntu-jammy.yml | 2 ++ .github/workflows/weekly-mainline-sync.yml | 26 ++++++++++++++++++++++ 5 files changed, 34 insertions(+) create mode 100644 .github/workflows/weekly-mainline-sync.yml diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index f6de06e6..89ae9ba7 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -13,6 +13,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' pull_request: branches: [ amd-mainline, amd-staging, release/** ] @@ -25,6 +26,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' concurrency: diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index d48ab579..aee533bf 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -13,6 +13,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' pull_request: branches: [ amd-mainline, amd-staging, release/** ] @@ -25,6 +26,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' concurrency: diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 93ed6182..67c72611 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -13,6 +13,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' pull_request: branches: [ amd-mainline, amd-staging, release/** ] @@ -25,6 +26,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' concurrency: diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index b69b0634..0bdc77ff 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -13,6 +13,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' pull_request: branches: [ amd-mainline, amd-staging, release/** ] @@ -25,6 +26,7 @@ on: - '.github/workflows/cpack.yml' - '.github/workflows/containers.yml' - '.github/workflows/formatting.yml' + - '.github/workflows/weekly-mainline-sync.yml' - 'docker/**' concurrency: diff --git a/.github/workflows/weekly-mainline-sync.yml b/.github/workflows/weekly-mainline-sync.yml new file mode 100644 index 00000000..37b4609a --- /dev/null +++ b/.github/workflows/weekly-mainline-sync.yml @@ -0,0 +1,26 @@ +name: Sync Mainline with Staging +on: + workflow_dispatch: + schedule: + - cron: 0 5 * * sun + +jobs: + promote-stg-to-main: + runs-on: ubuntu-latest + name: Promote Staging to Mainline + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: amd-mainline + fetch-depth: '0' + + - name: Merge - Fast Forward Only + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git checkout amd-mainline + git checkout -b promote-staging-$(date +%F) + git merge --ff-only origin/amd-staging + git push -u origin HEAD + gh pr create --base amd-mainline --title "Promote \`amd-staging\` to \`amd-mainline\`" --fill --label "automerge" From 915aa505608ff3f3acb5c48fe5ea057c45e3409c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 02:46:12 +0000 Subject: [PATCH 13/20] Bump rocm-docs-core[api_reference] from 1.8.3 to 1.8.4 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.8.3 to 1.8.4. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.8.3...v1.8.4) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index ad94caa9..00308cf3 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.8.3 +rocm-docs-core[api_reference]==1.8.4 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 1b5eb1e0..00435444 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.8.3 +rocm-docs-core[api-reference]==1.8.4 # via -r requirements.in smmap==5.0.1 # via gitdb From 043aa81baf4748acad5af453459b24a1b14d3151 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Mon, 18 Nov 2024 10:46:29 -0500 Subject: [PATCH 14/20] Update weekly-mainline-sync.yml Restrict job to the ROCm/rocprofiler-systems repository. No need to run on forks. --- .github/workflows/weekly-mainline-sync.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/weekly-mainline-sync.yml b/.github/workflows/weekly-mainline-sync.yml index 37b4609a..b175c36d 100644 --- a/.github/workflows/weekly-mainline-sync.yml +++ b/.github/workflows/weekly-mainline-sync.yml @@ -6,6 +6,7 @@ on: jobs: promote-stg-to-main: + if: github.repository == 'ROCm/rocprofiler-systems' runs-on: ubuntu-latest name: Promote Staging to Mainline steps: From 3d17f0b90a1f142b6fcbbfca49c9dc7f83410bc0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 02:15:02 +0000 Subject: [PATCH 15/20] Bump rocm-docs-core[api_reference] from 1.8.4 to 1.8.5 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.8.4 to 1.8.5. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/v1.8.5/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.8.4...v1.8.5) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 00308cf3..b0671ad4 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.8.4 +rocm-docs-core[api_reference]==1.8.5 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 00435444..75f2b291 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.8.4 +rocm-docs-core[api-reference]==1.8.5 # via -r requirements.in smmap==5.0.1 # via gitdb From 743c0fcc093f2ab8c8770c7ce059eddd50e0e91b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 02:40:55 +0000 Subject: [PATCH 16/20] Bump rocm-docs-core[api_reference] from 1.8.5 to 1.9.0 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.8.5 to 1.9.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/v1.9.0/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.8.5...v1.9.0) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 48 ++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index b0671ad4..92f9d922 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.8.5 +rocm-docs-core[api_reference]==1.9.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 75f2b291..11ccc306 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -25,9 +25,18 @@ cffi==1.17.1 charset-normalizer==3.3.2 # via requests click==8.1.7 - # via sphinx-external-toc + # via + # click-log + # doxysphinx + # sphinx-external-toc +click-log==0.4.0 + # via doxysphinx +contourpy==1.3.1 + # via matplotlib cryptography==43.0.1 # via pyjwt +cycler==0.12.1 + # via matplotlib deprecated==1.2.14 # via pygithub docutils==0.21.2 @@ -36,8 +45,12 @@ docutils==0.21.2 # myst-parser # pydata-sphinx-theme # sphinx +doxysphinx==3.3.12 + # via rocm-docs-core fastjsonschema==2.20.0 # via rocm-docs-core +fonttools==4.55.0 + # via matplotlib gitdb==4.0.11 # via gitpython gitpython==3.1.43 @@ -50,22 +63,40 @@ jinja2==3.1.4 # via # myst-parser # sphinx +kiwisolver==1.4.7 + # via matplotlib +libsass==0.22.0 + # via doxysphinx +lxml==5.2.1 + # via doxysphinx markdown-it-py==3.0.0 # via # mdit-py-plugins # myst-parser markupsafe==2.1.5 # via jinja2 +matplotlib==3.9.2 + # via doxysphinx mdit-py-plugins==0.4.2 # via myst-parser mdurl==0.1.2 # via markdown-it-py +mpire==2.10.2 + # via doxysphinx myst-parser==4.0.0 # via rocm-docs-core +numpy==1.26.4 + # via + # contourpy + # doxysphinx + # matplotlib packaging==24.1 # via + # matplotlib # pydata-sphinx-theme # sphinx +pillow==11.0.0 + # via matplotlib pycparser==2.22 # via cffi pydata-sphinx-theme==0.15.4 @@ -77,12 +108,21 @@ pygithub==2.4.0 pygments==2.18.0 # via # accessible-pygments + # mpire # pydata-sphinx-theme # sphinx +pyjson5==1.6.7 + # via doxysphinx pyjwt[crypto]==2.9.0 # via pygithub pynacl==1.5.0 # via pygithub +pyparsing==3.2.0 + # via + # doxysphinx + # matplotlib +python-dateutil==2.9.0.post0 + # via matplotlib pyyaml==6.0.2 # via # myst-parser @@ -92,8 +132,10 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.8.5 +rocm-docs-core[api-reference]==1.9.0 # via -r requirements.in +six==1.16.0 + # via python-dateutil smmap==5.0.1 # via gitdb snowballstemmer==2.2.0 @@ -135,6 +177,8 @@ sphinxcontrib-serializinghtml==2.0.0 # via sphinx tomli==2.0.2 # via sphinx +tqdm==4.67.1 + # via mpire typing-extensions==4.12.2 # via # pydata-sphinx-theme From eb1af1217793bbac4d3d3f8378eb760a23a59bd4 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Mon, 25 Nov 2024 16:02:53 -0500 Subject: [PATCH 17/20] Fix packaging for upgrading Specify that "rocprofiler-systems" replaces / obsoletes the "omnitrace" package in the CPACK configuration. Signed-off-by: David Galiffi --- cmake/ConfigCPack.cmake | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cmake/ConfigCPack.cmake b/cmake/ConfigCPack.cmake index 14ed2fcc..ce5fa731 100644 --- a/cmake/ConfigCPack.cmake +++ b/cmake/ConfigCPack.cmake @@ -45,6 +45,10 @@ set(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}") set(CPACK_PACKAGE_CONTACT "https://github.com/ROCm/rocprofiler-systems") set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE") set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) + +# For handling the project rebranding from "omnitrace" to "rocprofiler-systems" +set(OMNITRACE_PACKAGE_NAME "omnitrace") + set(ROCPROFSYS_CPACK_SYSTEM_NAME "${_SYSTEM_NAME}" CACHE STRING "System name, e.g. Linux or Ubuntu-20.04") @@ -184,6 +188,11 @@ set(CPACK_DEBIAN_PACKAGE_DEPENDS set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) +# Handle the project rebranding from "omnitrace" to "rocprofiler-systems" +set(CPACK_DEBIAN_PACKAGE_PROVIDES ${OMNITRACE_PACKAGE_NAME}) +set(CPACK_DEBIAN_PACKAGE_REPLACES ${OMNITRACE_PACKAGE_NAME}) +set(CPACK_DEBIAN_PACKAGE_BREAKS ${OMNITRACE_PACKAGE_NAME}) + # -------------------------------------------------------------------------------------- # # # RPM package specific variables @@ -200,7 +209,10 @@ string(REGEX REPLACE "([a-zA-Z])-([0-9])" "\\1\\2" CPACK_RPM_PACKAGE_RELEASE "${CPACK_RPM_PACKAGE_RELEASE}") string(REPLACE "-" "~" CPACK_RPM_PACKAGE_RELEASE "${CPACK_RPM_PACKAGE_RELEASE}") -set(_RPM_PACKAGE_PROVIDES "") +# Handle the project rebranding from "omnitrace" to "rocprofiler-systems" +set(CPACK_RPM_PACKAGE_OBSOLETES ${OMNITRACE_PACKAGE_NAME}) +set(CPACK_RPM_PACKAGE_CONFLICTS ${OMNITRACE_PACKAGE_NAME}) +set(_RPM_PACKAGE_PROVIDES ${OMNITRACE_PACKAGE_NAME}) if(ROCPROFSYS_BUILD_LIBUNWIND) list(APPEND _RPM_PACKAGE_PROVIDES "libunwind.so.99()(64bit)") From ffcc0b6f3db0cfee7476ac990d57105d3811dc78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 28 Nov 2024 03:03:32 +0000 Subject: [PATCH 18/20] Bump rocm-docs-core[api_reference] from 1.9.0 to 1.9.1 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.9.0 to 1.9.1. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.9.0...v1.9.1) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 92f9d922..d0fdc5c1 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.9.0 +rocm-docs-core[api_reference]==1.9.1 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 11ccc306..0e82c3e3 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -132,7 +132,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.9.0 +rocm-docs-core[api-reference]==1.9.1 # via -r requirements.in six==1.16.0 # via python-dateutil From 7580eb950b522022d9b0c017d32aaba48afa3c14 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 02:59:19 +0000 Subject: [PATCH 19/20] Bump rocm-docs-core[api_reference] from 1.9.1 to 1.10.0 in /docs/sphinx Bumps [rocm-docs-core[api_reference]](https://github.com/ROCm/rocm-docs-core) from 1.9.1 to 1.10.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.9.1...v1.10.0) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index d0fdc5c1..09799066 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.9.1 +rocm-docs-core[api_reference]==1.10.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 0e82c3e3..ba69857a 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -132,7 +132,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.9.1 +rocm-docs-core[api-reference]==1.10.0 # via -r requirements.in six==1.16.0 # via python-dateutil From 8f0282bb691fd1286fdc90ab3e6823ad86415f86 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Tue, 3 Dec 2024 12:38:35 -0500 Subject: [PATCH 20/20] Add test for ompt-target --- tests/rocprof-sys-openmp-tests.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/rocprof-sys-openmp-tests.cmake b/tests/rocprof-sys-openmp-tests.cmake index 94d072c6..91a5cb9d 100644 --- a/tests/rocprof-sys-openmp-tests.cmake +++ b/tests/rocprof-sys-openmp-tests.cmake @@ -38,6 +38,16 @@ rocprofiler_systems_add_test( REWRITE_RUN_PASS_REGEX "${_OMPT_PASS_REGEX}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure") +rocprofiler_systems_add_test( + SKIP_RUNTIME SKIP_REWRITE + NAME openmp-target + TARGET openmp-target + GPU ON + LABELS "openmp;openmp-target" + ENVIRONMENT + "${_ompt_environment};ROCPROFSYS_ROCTRACER_HSA_ACTIVITY=OFF;ROCPROFSYS_ROCTRACER_HSA_API=OFF" + ) + set(_ompt_sampling_environ "${_ompt_environment}" "ROCPROFSYS_VERBOSE=2"