Skip to content

Commit

Permalink
Merge branch 'amd-develop' into amd-master
Browse files Browse the repository at this point in the history
Change-Id: I32d41081ac065f2c50531dc2e420802d765665e2
  • Loading branch information
mangupta committed Nov 14, 2016
2 parents b935a87 + 09b157c commit 8d40253
Show file tree
Hide file tree
Showing 41 changed files with 1,957 additions and 655 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.*
!.gitignore
*.o
*.exe
*.swp
Expand Down
5 changes: 1 addition & 4 deletions .vimrc
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
:set tabstop=4
:set shiftwidth=4
:set expandtab
:set smartindent
:set makeprg=make\ -C\ build.hcc-LC.db
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ if(NOT DEFINED COMPILE_HIP_ATP_MARKER)
endif()
add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER)


#############################
# Build steps
#############################
Expand All @@ -164,7 +165,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSION_MAJOR} -DHIP_VERSION_MINOR=${HIP_VERSION_MINOR} -DHIP_VERSION_PATCH=${HIP_VERSION_PATCH}")

# Add remaining flags
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -I/opt/rocm/libhsakmt/include -stdlib=libc++")
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC -hc -I${HCC_HOME}/include -I${HSA_PATH}/include -I/opt/rocm/libhsakmt/include -I/usr/local/include/c++/v1 -stdlib=libc++")

# Set compiler and compiler flags
set(CMAKE_CXX_COMPILER "${HCC_HOME}/bin/hcc")
Expand Down Expand Up @@ -337,7 +338,7 @@ endif()
add_custom_target(install_for_test COMMAND "${CMAKE_COMMAND}" --build . --target install
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
execute_process(COMMAND getconf _NPROCESSORS_ONLN OUTPUT_VARIABLE DASH_JAY OUTPUT_STRIP_TRAILING_WHITESPACE)
add_custom_target(test COMMAND ${CMAKE_COMMAND} .
add_custom_target(test COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} .
COMMAND make -j ${DASH_JAY}
COMMAND make test
WORKING_DIRECTORY ${BUILD_DIR}
Expand Down
5 changes: 5 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ Differences or limitations of HIP APIs as compared to CUDA APIs should be clearl
- ihipLogStatus should only be called from top-level HIP APIs,and should be called to log and return the error code. The error code
is used by the GetLastError and PeekLastError functions - if a HIP API simply returns, then the error will not be logged correctly.

- All HIP environment variables should begin with the keyword HIP_
Environment variables should be long enough to describe their purpose but short enough so they can be remembered - perhaps 10-20 characters, with 3-4 parts separated by underscores.
To see the list of current environment variables, along with their values, set HIP_PRINT_ENV and run any hip applications on ROCM platform .
HIPCC or other tools may support additional environment variables which should follow the above convention.



#### Presubmit Testing:
Expand Down
17 changes: 17 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,25 @@ Upcoming:

## Revision History:

===================================================================================================
Release:1.0
Date: 2016.11.8
- Initial implementation for FindHIP.cmake
- HIP library now installs as a static library by default
- Added support for HIP context and HIP module APIs
- Major changes to HIP signal & memory management implementation
- Support for complex data type and math functions
- clang-hipify is now known as hipify-clang
- Added several new HIP samples
- Preliminary support for new APIs: hipMemcpyToSymbol, hipDeviceGetLimit, hipRuntimeGetVersion
- Added support for async memcpy driver API (for example hipMemcpyHtoDAsync)
- Support for memory management device functions: malloc, free, memcpy & memset
- Removed deprecated HIP runtime header locations. Please include "hip/hip_runtime.h" instead of "hip_runtime.h". You can use `find . -type f -exec sed -i 's:#include "hip_runtime.h":#include "hip/hip_runtime.h":g' {} +` to replace all such references


===================================================================================================
Release:0.92.00
Date: 2016.8.14
- hipLaunchKernel supports one-dimensional grid and/or block dims, without explicit cast to dim3 type (actually in 0.90.00)
- fp16 software support
- Support for Hawaii dGPUs using environment variable ROCM_TARGET=hawaii
Expand Down
29 changes: 18 additions & 11 deletions bin/hipcc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ if ($HIP_PLATFORM eq "hcc") {

$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";

$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'};
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";

$ROCM_TARGET=$ENV{'ROCM_TARGET'} // "fiji";
Expand Down Expand Up @@ -116,9 +116,16 @@ if ($HIP_PLATFORM eq "hcc") {
} else {
$HIPLDFLAGS .= " -Wl,--defsym=_binary_kernel_spir_end=1 -Wl,--defsym=_binary_kernel_spir_start=1 -Wl,--defsym=_binary_kernel_cl_start=1 -Wl,--defsym=_binary_kernel_cl_end=1";
}
if ($HOST_OSNAME eq "fedora") {
$HIPCXXFLAGS .= " -I/usr/local/include/c++/v1";
}

# Satisfy HCC dependencies
$HIPLDFLAGS .= " -lc++abi -lsupc++";
if ($HOST_OSNAME eq "fedora") {
$HIPLDFLAGS .= " -lc++abi";
} else {
$HIPLDFLAGS .= " -lc++abi -lsupc++";
}
$HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am -lhsakmt";

# Handle ROCm target platform
Expand Down Expand Up @@ -273,10 +280,9 @@ foreach $arg (@ARGV)
# Process HIPCC options here:
if ($arg =~ m/^--hipcc/) {
$swallowArg = 1;
if ($arg eq "--hipcc_explicit_lib") {
# Some environments (ie cmake tests) already link their own hip_hcc.o, so don't add here:
$needHipHcc = 0;
}
#if $arg eq "--hipcc_profile") { # Example argument here, hipcc
#
#}
} else {
push (@options, $arg);
}
Expand Down Expand Up @@ -314,14 +320,15 @@ if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc')
}

if ($needHipHcc) {
$HIP_LIB_TYPE = $hipConfig{'HIP_LIB_TYPE'} // 0;
$HIP_LIB_TYPE = $hipConfig{'HIP_LIB_TYPE'} // 1;

# TODO - remove the old sea-of-objects solution:
if ($HIP_LIB_TYPE eq 0) {
$HIPLDFLAGS .= " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/hip_ldg.cpp.o $HIP_PATH/lib/hip_fp16.cpp.o $HIP_PATH/lib/hip_context.cpp.o $HIP_PATH/lib/hip_module.cpp.o";
substr($HIPLDFLAGS,0,0) = " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/hip_ldg.cpp.o $HIP_PATH/lib/hip_fp16.cpp.o $HIP_PATH/lib/hip_context.cpp.o $HIP_PATH/lib/hip_module.cpp.o ";
} elsif ($HIP_LIB_TYPE eq 1) {
$HIPLDFLAGS .= " -L$HIP_PATH/lib -lhip_hcc" ;
substr($HIPLDFLAGS,0,0) = " -L$HIP_PATH/lib -lhip_hcc " ;
} else {
$HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc";
substr($HIPLDFLAGS,0,0) = " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc ";
}
}

Expand Down Expand Up @@ -353,7 +360,7 @@ if ($printHipVersion) {
}
if ($runCmd) {
if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) {
print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using hcc $HCC_VERSION. Please rebuild HIP including cmake.\n") && die ();
print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using $HCC_HOME/hcc with version $HCC_VERSION from hipcc. Please rebuild HIP including cmake or update HCC_HOME variable.\n") && die ();
}
system ("$CMD") and die ();
}
19 changes: 19 additions & 0 deletions bin/hipdemangleatp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# usage: hipdemangleatp.sh ATP_FILE

# HIP kernels
kernels=$(grep grid_launch_parm $1 | cut -d" " -f1 | sort | uniq)
for mangled_sym in $kernels; do
real_sym=$(c++filt -p $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g' | sed 's/ /\\\&nbsp/g'))
#echo "$mangled_sym => $real_sym" >> $1.log
sed -i "s/$mangled_sym/$real_sym/g" $1
done

# HC kernels
kernels=$(grep cxxamp_trampoline $1 | cut -d" " -f1 | sort | uniq)
for mangled_sym in $kernels; do
real_sym=$(echo $mangled_sym | sed "s/^/_/g; s/_EC_/_$/g" | c++filt -p | cut -d\( -f1 | cut -d" " -f1 --complement | sed 's/ /\\\&nbsp/g')
#echo "$mangled_sym => $real_sym" >> $1.log
sed -i "s/$mangled_sym/$real_sym/g" $1
done
38 changes: 1 addition & 37 deletions docs/markdown/hip_faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,43 +229,7 @@ If platform portability is important, use #ifdef __HIP_PLATFORM_HIPCC__ to guard


### How do I trace HIP application flow?
#### Using CodeXL markers for HIP Functions
HIP can generate markers at function being/end which are displayed on the CodeXL timeline view.
To do this, you need to install ROCm-Profiler and enable HIP to generate the markers:

1. Install ROCm-Profiler
Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well.
Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler).

2. Build HIP with ATP markers enabled
HIP pre-built packages are enabled with ATP marker support by default.
To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step.

3. Set HIP_ATP_MARKER
```shell
export HIP_ATP_MARKER=1
```

4. Recompile the target application

5. Run with profiler enabled to generate ATP file.
```shell
# Use profile to generate timeline view:
/opt/rocm/bin/rocm-profiler -o <outputATPFileName> -A <applicationName> <applicationArguments>

Or
/opt/rocm/bin/rocm-profiler -e HIP_ATP_MARKER=1 -o <outputATPFileName> -A <applicationName> <applicationArguments>
```

#### Using HIP_TRACE_API
You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided
by the HIP_DB switch. For example:
```shell
# Trace to stderr showing being/end of each function (with arguments) + intermediate debug trace during the execution of each function.
HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp
```

Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors.
See the [HIP Profiling Guide](hip_porting_guide.md) for more information.

### What if HIP generates error of "symbol multiply defined!" only on AMD machine?
Unlike CUDA, in HCC, for functions defined in the header files, the keyword of "__forceinline__" does not imply "static".
Expand Down
7 changes: 5 additions & 2 deletions docs/markdown/hip_kernel_language.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,11 @@ typedef struct dim3 {
## Memory-Fence Instructions
HIP supports __threadfence() and __threadfence_block().

Applications that use threadfence_system can disable the L1 and L2 caches on the GPU by:
"export HSA_DISABLE_CACHE=1". See the hip_porting_guide.md#threadfence_system for more information.
HIP provides workaround for threadfence_system() under HCC path.
To enable the workaround, HIP should be built with environment variable HIP_COHERENT_HOST_ALLOC enabled.
In addition,the kernels that use __threadfence_system() should be modified as follows:
- The kernel should only operate on finegrained system memory; which should be allocated with hipHostMalloc().
- Remove all memcpy for those allocated finegrained system memory regions.

## Synchronization Functions
The __syncthreads() built-in function is supported in HIP. The __syncthreads_count(int), __syncthreads_and(int) and __syncthreads_or(int) functions are under development.
Expand Down
2 changes: 1 addition & 1 deletion docs/markdown/hip_porting_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ HIP_LAUNCH_BLOCKING = 0 : Make HIP APIs 'host-synchronous', so they
HIP_DB = 0 : Print various debug info. Bitmask, see hip_hcc.cpp for more information.
HIP_TRACE_API = 0 : Trace each HIP API call. Print function name and return code to stderr as program executes.
HIP_TRACE_API_COLOR = green : Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White
HIP_ATP_MARKER = 0 : Add HIP function begin/end to ATP file generated with CodeXL
HIP_PROFILE_API = 0 : Add HIP function begin/end to ATP file generated with CodeXL
HIP_VISIBLE_DEVICES = 0 : Only devices whose index is present in the secquence are visible to HIP applications and they are enumerated in the order of secquence
HIP_NUM_KERNELS_INFLIGHT = 128 : Number of kernels per stream
Expand Down
95 changes: 95 additions & 0 deletions docs/markdown/hip_profiling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Profiling HIP Code

HIP provides several capabilities to support debugging and profiling. Profiling information can be displayed to stderr or viewed in the CodeXl visualization tool.

### Usign CodeXL to profile a HIP Application
By defauly, CodeXL can trace all kernel commands, data transfer commands, and HSA Runtime (ROCr) API calls.
/opt/rocm/bin/rocm-profiler -o <outputATPFileName> -A <applicationName> <applicationArguments>

### Using CodeXL markers for HIP Functions
HIP can generate markers at function being/end which are displayed on the CodeXL timeline view.
HIP 1.0 compiles marker support by default, and you can enable it by setting the HIP_PROFILE_API environment variable and then running the rocm-profiler:

```shell

# Use profile to generate timeline view:
export HIP_PROFILE_API=1
/opt/rocm/bin/rocm-profiler -o <outputATPFileName> -A <applicationName> <applicationArguments>

Or
/opt/rocm/bin/rocm-profiler -e HIP_PROFILE_API=1 -o <outputATPFileName> -A <applicationName> <applicationArguments>
```

#### Developer Builds
For developer builds, you must enable marker support manually when compiling HIP.

1. Build HIP with ATP markers enabled
HIP pre-built packages are enabled with ATP marker support by default.
To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step.

2. Install ROCm-Profiler
Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well.
Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler).

3. Recompile the target application

Then follow the steps above to collect a marker-enabled trace.


### Using HIP_TRACE_API
You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided
by the HIP_DB switch. For example:
```shell
# Trace to stderr showing being/end of each function (with arguments) + intermediate debug trace during the execution of each function.
HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp
```

#### Color
Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors.
You can change the color used for the trace mode with the HIP_TRACE_API_COLOR environment variable. Possible values are None/Red/Green/Yellow/Blue/Magenta/Cyan/White.
None will disable use of color control codes and may be useful when saving the trace file or when a pure text trace is desired.

####


### Using HIP_DB

This flag is primarily targeted to assist HIP development team in the development of the HIP runtime, but in some situations may be useful to HIP application developers as well.
The HIP debug information is designed to print important information during the execution of a HIP API. HIP provides
different color-coded levels of debug informaton:
- api : Print the beginning and end of each HIP API, including the arguments and return codes.
- sync : Print multi-thread and other synchronization debug information.
- copy : Print which engine is doing the copy, which copy flavor is selected, information on source and destination memory.
- mem : Print information about memory allocation - which pointers are allocated, where they are allocated, peer mappings, and more.

DB_MEM format is flags separated by '+' sign, or a hex code for the bitmask. Generally the + format is preferred.
For example:
```shell
HIP_DB=api+copy+mem my-application
HIP_DB=0xF my-application
```
HIP_DB=1 same as HIP_TRACE_API=1




Trace provides quick look at API.
Explain output of
Reference the cookbook example.
Command-line profile.
/// disable profiling at the start of the application you can start CodeXLGpuProfiler with the --startdisabled flag.

Can use strace interleaved with HSA Debug calls .

HIP_PROFILE_API=1
HIP_PROFILE_API=2 : Will show the full API in the trace. This can be useful for lower-level debugging when you want to see all the parameters that are passed to a specific API.

demangle atp

Write how to collect performance counters.
- include how to compute bandwidth for copy and kernel activity.

- How to disable HSA APIs.
- Do I need to use profiler with HSA enabled? Do I need to enable HSA profiling on the command line?

Offline compile, how to visualize.
12 changes: 10 additions & 2 deletions hipify-clang/src/Cuda2Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,11 @@ struct cuda2hipMap {
cuda2hipRename["cuMemHostRegister_v2"] = {"hipHostRegister", CONV_MEM, API_DRIVER};
cuda2hipRename["cuMemHostUnregister"] = {"hipHostUnregister", CONV_MEM, API_DRIVER};


// Profiler
// unsupported yet by HIP
// cuda2hipRename["cuProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_DRIVER};
cuda2hipRename["cuProfilerStart"] = {"hipProfilerStart", CONV_OTHER, API_DRIVER};
cuda2hipRename["cuProfilerStop"] = {"hipProfilerStop", CONV_OTHER, API_DRIVER};

/////////////////////////////// CUDA RT API ///////////////////////////////
// Error API
Expand Down Expand Up @@ -1606,7 +1610,11 @@ class Cuda2HipCallback : public MatchFinder::MatchCallback {
}
}
XStr.clear();
OS << "hipLaunchKernel(HIP_KERNEL_NAME(" << calleeName << "),";
if (calleeName.find(',') != StringRef::npos) {
SmallString<128> tmpData;
calleeName = Twine("HIP_KERNEL_NAME(" + calleeName + ")").toStringRef(tmpData);
}
OS << "hipLaunchKernel(" << calleeName << ",";
const CallExpr *config = launchKernel->getConfig();
DEBUG(dbgs() << "Kernel config arguments:" << "\n");
SourceManager *SM = Result.SourceManager;
Expand Down
13 changes: 11 additions & 2 deletions include/hip/hcc_detail/hip_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,10 +556,19 @@ extern "C" __device__ void __threadfence(void);
*
* @param void
*
* @warning __threadfence_system is a stub and map to no-op, application should set "export HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
* @warning __threadfence_system is a stub and map to no-op.
*/
__device__ void __threadfence_system(void) __attribute__((deprecated("Provided for compile-time compatibility, not yet functional")));
__device__ void __threadfence_system(void) __attribute__((deprecated("Provided with workaround configuration, see hip_kernel_language.md for details")));

__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
__device__ float __hip_ds_bpermutef(int index, float src);
__device__ unsigned __hip_ds_permute(int index, unsigned src);
__device__ float __hip_ds_permutef(int index, float src);

__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern);
__device__ float __hip_ds_swizzlef(float src, int pattern);

__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl);

// doxygen end Fence Fence
/**
Expand Down
Loading

0 comments on commit 8d40253

Please sign in to comment.