forked from microsoft/onnxruntime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fix_Clang_Build.patch
200 lines (188 loc) · 7.65 KB
/
Fix_Clang_Build.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c23746e7f..bc326c8b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,10 +23,10 @@ endif()
set(version 1.1.0)
# Check support for CUDA/HIP in Cmake
-project(composable_kernel VERSION ${version} LANGUAGES CXX)
+project(composable_kernel VERSION ${version} LANGUAGES CXX HIP)
include(CTest)
-find_package(Python3 3.6 COMPONENTS Interpreter REQUIRED)
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
@@ -227,27 +227,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
-## OpenMP
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- # workaround issue hipcc in rocm3.5 cannot find openmp
- set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
- set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument")
- set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5")
- set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
- set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
- set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES})
-else()
- find_package(OpenMP REQUIRED)
-endif()
-
-message("OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
-message("OpenMP_gomp_LIBRARY: ${OpenMP_gomp_LIBRARY}")
-message("OpenMP_pthread_LIBRARY: ${OpenMP_pthread_LIBRARY}")
-message("OpenMP_CXX_FLAGS: ${OpenMP_CXX_FLAGS}")
-
-link_libraries(${OpenMP_gomp_LIBRARY})
-link_libraries(${OpenMP_pthread_LIBRARY})
-
## HIP
find_package(HIP REQUIRED)
# Override HIP version in config.h, if necessary.
@@ -269,12 +248,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
-link_libraries(hip::device)
-if(CK_hip_VERSION VERSION_GREATER_EQUAL 6.0.23494)
- add_compile_definitions(__HIP_PLATFORM_AMD__=1)
-else()
- add_compile_definitions(__HIP_PLATFORM_HCC__=1)
-endif()
## tidy
include(EnableCompilerWarnings)
@@ -541,11 +514,3 @@ rocm_install(FILES
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
-
-rocm_create_package(
- NAME composablekernel
- DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
- MAINTAINER "MIOpen Kernels Dev Team <[email protected]>"
- LDCONFIG
- HEADER_ONLY
-)
diff --git a/example/ck_tile/01_fmha/generate.py b/example/ck_tile/01_fmha/generate.py
index 51fecd07b..5ed371995 100644
--- a/example/ck_tile/01_fmha/generate.py
+++ b/example/ck_tile/01_fmha/generate.py
@@ -566,7 +566,7 @@ def write_blobs(output_dir : Optional[str], kernel_filter : Optional[str], recei
def list_blobs(output_file : Optional[str], kernel_filter : Optional[str], receipt, mask_impl) -> None:
assert output_file is not None
file_path = Path(output_file)
- with file_path.open('a') as f:
+ with file_path.open('w') as f:
_, kernels = get_blobs(kernel_filter, receipt, mask_impl)
for kernel in kernels:
f.write(str(file_path.parent / GEN_DIR / kernel.filename) + "\n")
diff --git a/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp
index c0894f1d7..559481fee 100644
--- a/include/ck/host_utility/hip_check_error.hpp
+++ b/include/ck/host_utility/hip_check_error.hpp
@@ -6,19 +6,7 @@
#include <sstream>
#include <hip/hip_runtime.h>
-// To be removed, which really does not tell the location of failed HIP functional call
-inline void hip_check_error(hipError_t x)
-{
- if(x != hipSuccess)
- {
- std::ostringstream ss;
- ss << "HIP runtime error: " << hipGetErrorString(x) << ". "
- << "hip_check_error.hpp"
- << ": " << __LINE__ << "in function: " << __func__;
- throw std::runtime_error(ss.str());
- }
-}
-
+#ifndef HIP_CHECK_ERROR
#define HIP_CHECK_ERROR(retval_or_funcall) \
do \
{ \
@@ -32,3 +20,9 @@ inline void hip_check_error(hipError_t x)
throw std::runtime_error(ostr.str()); \
} \
} while(0)
+#endif
+
+#ifndef hip_check_error
+#define hip_check_error HIP_CHECK_ERROR
+#endif
+
diff --git a/include/ck_tile/core/utility/transpose_vectors.hpp b/include/ck_tile/core/utility/transpose_vectors.hpp
index a164c3f94..293ead89a 100644
--- a/include/ck_tile/core/utility/transpose_vectors.hpp
+++ b/include/ck_tile/core/utility/transpose_vectors.hpp
@@ -11,6 +11,9 @@
namespace ck_tile {
+template <typename... Ts>
+constexpr bool always_false = false;
+
// S: scalar type (or it can be non-scalar type)
// NX: # of vector before transpose
// NY: # of vector after transpose
@@ -117,9 +120,11 @@ struct transpose_vectors
}
else
{
- static_assert(false, "not implemented");
+ static_assert(always_false<S_, number<NX>, number<NY>>, "not implemented");
}
}
};
+
} // namespace ck_tile
+
diff --git a/include/ck_tile/host/hip_check_error.hpp b/include/ck_tile/host/hip_check_error.hpp
index 3acdb4d87..cc26e184f 100644
--- a/include/ck_tile/host/hip_check_error.hpp
+++ b/include/ck_tile/host/hip_check_error.hpp
@@ -8,20 +8,7 @@
#include <stdexcept>
#include <hip/hip_runtime.h>
-namespace ck_tile {
-// To be removed, which really does not tell the location of failed HIP functional call
-CK_TILE_HOST void hip_check_error(hipError_t x)
-{
- if(x != hipSuccess)
- {
- std::ostringstream ss;
- ss << "HIP runtime error: " << hipGetErrorString(x) << ". " << __FILE__ << ": " << __LINE__
- << "in function: " << __func__;
- throw std::runtime_error(ss.str());
- }
-}
-} // namespace ck_tile
-
+#ifndef HIP_CHECK_ERROR
#define HIP_CHECK_ERROR(retval_or_funcall) \
do \
{ \
@@ -34,3 +21,9 @@ CK_TILE_HOST void hip_check_error(hipError_t x)
throw std::runtime_error(ostr.str()); \
} \
} while(0)
+#endif
+
+#ifndef hip_check_error
+#define hip_check_error HIP_CHECK_ERROR
+#endif
+
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index c035e7e56..8c5f36d2e 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -59,8 +59,14 @@ function(add_instance_library INSTANCE_NAME)
endforeach()
#only continue if there are some source files left on the list
if(ARGN)
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
+ # Always disable debug symbol and C debug assert due to
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
target_compile_features(${INSTANCE_NAME} PUBLIC)
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(${INSTANCE_NAME})
set(result 0)