Skip to content

Commit

Permalink
miscellaneous fixes
Browse files Browse the repository at this point in the history
- use pthread instread of omp in multi_monitor.cpp test
- fix path for hipcc
- use papi@master as default version of papi
- print the test name in run script
  • Loading branch information
gcongiu committed Aug 16, 2023
1 parent d151772 commit 3c71e6e
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 113 deletions.
8 changes: 4 additions & 4 deletions validation_tests/papi-rocm/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
PAPI_ROCM_ROOT ?= $(ROCM_ROOT) #/path/to/rocm/

CC = $(PAPI_ROCM_ROOT)/bin/hipcc
CXX = $(PAPI_ROCM_ROOT)/bin/hipcc
CXXFLAGS += -g -O2 -fopenmp
CC = $(HIP_ROOT)/bin/hipcc
CXX = $(HIP_ROOT)/bin/hipcc
CXXFLAGS += -g -O2
CPPFLAGS += -I$(PAPI_ROCM_ROOT)/include -I$(PAPI_ROOT)/include
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi -fopenmp
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi

ALL: single_monitor multi_monitor overflow

Expand Down
2 changes: 1 addition & 1 deletion validation_tests/papi-rocm/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
set -e
set -x

make PAPIROOT=$PAPI_ROOT
make PAPIROOT=$PAPI_ROOT
230 changes: 130 additions & 100 deletions validation_tests/papi-rocm/multi_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,119 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <omp.h>
#include <pthread.h>
#include "papi.h"
#include "matmul.h"

#define NUM_EVENTS 2
const char *events[NUM_EVENTS] = {
"rocm:::SQ_WAVES",
"rocm:::SQ_WAVES_RESTORED",
};

typedef struct {
int num_thread;
} thread_arg_t;

void *run(void *arg)
{
int eventset = PAPI_NULL;
int papi_errno = PAPI_create_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

int thread_num = ((thread_arg_t *) arg)->num_thread;
for (int j = 0; j < NUM_EVENTS; ++j) {
char named_event[PAPI_MAX_STR_LEN] = { 0 };
sprintf(named_event, "%s:device=%d", events[j], thread_num);
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
}

papi_errno = PAPI_start(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

hipError_t hip_errno = hipSetDevice(thread_num);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hipStream_t stream;
hip_errno = hipStreamCreate(&stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

void *handle;
int matmul_errno;
matmul_errno = matmul_init(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_run(handle, stream);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamSynchronize(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamDestroy(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_finalize(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

long long counters[NUM_EVENTS] = { 0 };
papi_errno = PAPI_stop(eventset, counters);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

for (int i = 0; i < NUM_EVENTS; ++i) {
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
thread_num, events[i], thread_num,
counters[i]);
}

papi_errno = PAPI_cleanup_eventset(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

papi_errno = PAPI_destroy_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
int papi_errno;
Expand All @@ -30,7 +139,7 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}

papi_errno = PAPI_thread_init((unsigned long (*)(void)) omp_get_thread_num);
papi_errno = PAPI_thread_init((unsigned long (*)(void)) pthread_self);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_thread_init: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
Expand All @@ -44,112 +153,33 @@ int main(int argc, char *argv[])
}

num_threads = (num_threads < num_devices) ? num_threads : num_devices;
omp_set_num_threads(num_threads);
fprintf(stdout, "Run rocm test with %d threads\n", num_threads);

#define NUM_EVENTS 2
const char *events[NUM_EVENTS] = {
"rocm:::SQ_WAVES",
"rocm:::SQ_WAVES_RESTORED",
};

#pragma omp parallel
{
int eventset = PAPI_NULL;
papi_errno = PAPI_create_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

int thread_num = omp_get_thread_num();
for (int j = 0; j < NUM_EVENTS; ++j) {
char named_event[PAPI_MAX_STR_LEN] = { 0 };
sprintf(named_event, "%s:device=%d", events[j], thread_num);
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
}

papi_errno = PAPI_start(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

hip_errno = hipSetDevice(thread_num);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hipStream_t stream;
hip_errno = hipStreamCreate(&stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

void *handle;
int matmul_errno;
matmul_errno = matmul_init(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_run(handle, stream);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamSynchronize(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamDestroy(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_finalize(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}
pthread_t *thread = (pthread_t *)malloc(num_threads * sizeof(*thread));
if (thread == NULL) {
return EXIT_FAILURE;
}

long long counters[NUM_EVENTS] = { 0 };
papi_errno = PAPI_stop(eventset, counters);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
thread_arg_t *arg = (thread_arg_t *)malloc(num_threads * sizeof(*arg));
if (arg == NULL) {
return EXIT_FAILURE;
}

for (int i = 0; i < NUM_EVENTS; ++i) {
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
omp_get_thread_num(), events[i], thread_num,
counters[i]);
}
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

papi_errno = PAPI_cleanup_eventset(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
for (int i = 0; i < num_threads; ++i) {
arg[i].num_thread = i;
pthread_create(&thread[i], &attr, run, &arg[i]);
}

papi_errno = PAPI_destroy_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
for (int i = 0; i < num_threads; ++i) {
pthread_join(thread[i], NULL);
}

free(thread);
free(arg);
PAPI_shutdown();

return EXIT_SUCCESS;
Expand Down
14 changes: 7 additions & 7 deletions validation_tests/papi-rocm/run_rocm_tests.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash

echo "=== Run rocm test in sampling mode"
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./overflow
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "single_monitor" && ./single_monitor
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "multi_monitor" && ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "multi_monitor" &&./multi_monitor --threads=2
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "overflow" && ./overflow

echo "=== Run rocm test in intercept mode"
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "single_monitor" && ./single_monitor
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "multi_monitor" && ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$LD_LIBRARY_PATH echo "multi_monitor" && ./multi_monitor --threads=2
2 changes: 1 addition & 1 deletion validation_tests/papi-rocm/setup.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
. ../../setup.sh
spackLoadUnique papi+rocm@6.0.0.2:
spackLoadUnique papi+rocm@master:

0 comments on commit 3c71e6e

Please sign in to comment.