Skip to content

Commit

Permalink
Merge branch 'develop' into cin7
Browse files Browse the repository at this point in the history
  • Loading branch information
Fripping authored Aug 12, 2024
2 parents b9ecfef + 7232797 commit ab86a17
Show file tree
Hide file tree
Showing 552 changed files with 12,003 additions and 4,401 deletions.
2 changes: 2 additions & 0 deletions cmake/cinn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ set(PUBLISH_LIBS ON)
if(PUBLISH_LIBS)
set(core_includes
"${core_includes};paddle/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh")
set(core_includes
"${core_includes};paddle/cinn/runtime/hip/cinn_hip_runtime_source.h")
set(core_includes
"${core_includes};paddle/common/flags.h;paddle/utils/test_macros.h")
foreach(header ${core_includes})
Expand Down
1 change: 0 additions & 1 deletion cmake/external/json.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ set(JSON_INCLUDE_DIR ${JSON_PREFIX_DIR}/include)
set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/nlohmann_json)
set(SOURCE_INCLUDE_DIR ${SOURCE_DIR}/include)

include_directories(${JSON_INCLUDE_DIR})
include_directories(${SOURCE_INCLUDE_DIR})

set(JSON_BuildTests
Expand Down
10 changes: 5 additions & 5 deletions cmake/phi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ function(kernel_declare TARGET_LIST)
"(PD_REGISTER_KERNEL|PD_REGISTER_KERNEL_FOR_ALL_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE_EXCEPT_CUSTOM)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z_]*,[ \\\t\r\n]*[A-Z_]*"
first_registry
"${kernel_impl}")
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
if("${first_registry}" MATCHES ".*_grad,.*")
continue()
endif()
endif()
set(kernel_declare_id "")
while(NOT first_registry STREQUAL "")
string(REPLACE "${first_registry}" "" kernel_impl "${kernel_impl}")
Expand Down Expand Up @@ -162,11 +167,6 @@ function(kernel_declare TARGET_LIST)
endwhile()
# append kernel declare into declarations.h
if(NOT kernel_declare_id STREQUAL "")
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
if(${kernel_declare_id} MATCHES ".*_grad,.*")
continue()
endif()
endif()
file(APPEND ${kernel_declare_file} "${kernel_declare_id}\n")
endif()
endforeach()
Expand Down
31 changes: 24 additions & 7 deletions paddle/cinn/adt/simplify_value.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ struct SimplifyDotUndot {
pre_index_undot = index_undot_value;
}
}
CHECK(pre_index_undot.has_value());
PADDLE_ENFORCE_EQ(
pre_index_undot.has_value(),
true,
phi::errors::InvalidArgument("pre_index_undot should not be null"));
const auto& [index_value, undot_dims] =
pre_index_undot.value()
.Get<IndexUnDotValue<Value, List<DimExpr>>>()
Expand Down Expand Up @@ -195,9 +198,14 @@ struct SimplifyGcdShape {
const auto& iter_values = index_dot_values.Get<List<Value>>();
const auto& undot_dim_values = undot_dims;
const auto& dot_dim_values = dot_dims;
CHECK(IsConstantListAllPositiveInt64(undot_dim_values));
CHECK(IsConstantListAllPositiveInt64(dot_dim_values));

PADDLE_ENFORCE_EQ(IsConstantListAllPositiveInt64(undot_dim_values),
true,
phi::errors::InvalidArgument(
"The undot_dim_values should be all positive int64"));
PADDLE_ENFORCE_EQ(IsConstantListAllPositiveInt64(dot_dim_values),
true,
phi::errors::InvalidArgument(
"The dot_dim_values should be all positive int64"));
const auto& sub_reshape_dim_ranges =
GetSubReshapeDimRanges(undot_dim_values, dot_dim_values);
if (!sub_reshape_dim_ranges.has_value()) {
Expand Down Expand Up @@ -321,7 +329,10 @@ struct SimplifyDotDot {
std::int64_t Product(const List<DimExpr>& dims) {
std::int64_t ret = 1;
for (const auto& dim : *dims) {
CHECK(dim.Has<std::int64_t>());
PADDLE_ENFORCE_EQ(
dim.Has<std::int64_t>(),
true,
phi::errors::InvalidArgument("dim should have std::int64_t"));
ret *= dim.Get<std::int64_t>();
}
return ret;
Expand Down Expand Up @@ -400,7 +411,10 @@ struct SymbolicDim_SimplifyDotUndot {
pre_index_undot = index_undot_value;
}
}
CHECK(pre_index_undot.has_value());
PADDLE_ENFORCE_EQ(
pre_index_undot.has_value(),
true,
phi::errors::InvalidArgument("pre_index_undot should not be null"));
const auto& [index_value, undot_dims] =
pre_index_undot.value()
.Get<IndexUnDotValue<Value, List<DimExpr>>>()
Expand Down Expand Up @@ -447,7 +461,10 @@ struct SymbolicDim_SimplifyDotUndot_DimExpr {
pre_index_undot = index_undot_value;
}
}
CHECK(pre_index_undot.has_value());
PADDLE_ENFORCE_EQ(
pre_index_undot.has_value(),
true,
phi::errors::InvalidArgument("pre_index_undot should not be null"));
const auto& [index_value, undot_dims] =
pre_index_undot.value()
.Get<IndexUnDotValue<Value, List<DimExpr>>>()
Expand Down
4 changes: 4 additions & 0 deletions paddle/cinn/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ if(WITH_CUDA)
list(APPEND srcs cuda_util.cc codegen_cuda_dev.cc)
endif()

if(WITH_ROCM)
add_subdirectory(hip)
endif()

if(WITH_OPENMP)
cinn_cc_library(__x86_source_fake_lib SRCS _x86_builtin_source.cc)
endif()
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_host.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ llvm::Value* CodeGenCudaHost::LowerGPUKernelLauncher(
return function;
}

llvm::Value* CodeGenCudaHost::LowerCUDAKernelCall(const ir::Call* call_ir) {
llvm::Value* CodeGenCudaHost::LowerGPUKernelCall(const ir::Call* call_ir) {
std::vector<llvm::Value*> ll_function_args;
std::transform(f_->arg_begin(),
f_->arg_end(),
Expand Down
25 changes: 19 additions & 6 deletions paddle/cinn/backends/codegen_cuda_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,24 @@ class CodeGenCudaHost : public CodeGenHost {
}

llvm::Value *Visit(const ir::Call *op) override {
if (op->name == runtime::intrinsic::call_cuda_kernel) {
return LowerCUDAKernelCall(op);
} else {
return CodeGenHost::Visit(op);
}
return common::DefaultDeviceTarget().arch.Match(
[&](common::UnknownArch) { return CodeGenHost::Visit(op); },
[&](common::X86Arch) { return CodeGenHost::Visit(op); },
[&](common::ARMArch) { return CodeGenHost::Visit(op); },
[&](common::NVGPUArch) {
if (op->name == runtime::intrinsic::call_cuda_kernel) {
return LowerGPUKernelCall(op);
} else {
return CodeGenHost::Visit(op);
}
},
[&](common::HygonDCUArchHIP) {
if (op->name == runtime::intrinsic::call_hip_kernel) {
return LowerGPUKernelCall(op);
} else {
return CodeGenHost::Visit(op);
}
});
}

private:
Expand All @@ -66,7 +79,7 @@ class CodeGenCudaHost : public CodeGenHost {
*/
llvm::Value *LowerGPUKernelLauncher(const ir::_LoweredFunc_ *func);

llvm::Value *LowerCUDAKernelCall(const ir::Call *op);
llvm::Value *LowerGPUKernelCall(const ir::Call *op);
};

} // namespace backends
Expand Down
8 changes: 4 additions & 4 deletions paddle/cinn/backends/codegen_device_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,9 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc(
#endif
},
[&](common::HygonDCUArchHIP) {
PADDLE_THROW(::common::errors::Unimplemented(
"CINN todo: new hardware HygonDCUArchHIP"));
#ifdef CINN_WITH_HIP
shared_mem_bytes = hip::CalculateSharedMemory(func);
#endif
});

VLOG(6) << "Add a call node for func_node->name " << func_node->name << "\n"
Expand All @@ -241,8 +242,7 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc(
call_kernel = runtime::intrinsic::call_cuda_kernel;
},
[&](common::HygonDCUArchHIP) {
PADDLE_THROW(::common::errors::Unimplemented(
"CINN todo: new hardware HygonDCUArchHIP"));
call_kernel = runtime::intrinsic::call_hip_kernel;
});
ir::Expr call_extern_api =
ir::Call::Make(Void(),
Expand Down
14 changes: 10 additions & 4 deletions paddle/cinn/backends/codegen_device_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#ifdef CINN_WITH_CUDA
#include "paddle/cinn/backends/codegen_cuda_dev.h"
#endif
#ifdef CINN_WITH_HIP
#include "paddle/cinn/backends/hip/codegen_hip_dev.h"
#endif
#include "paddle/cinn/cinn.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_mutator.h"
Expand Down Expand Up @@ -130,8 +133,12 @@ struct CollectHostFunctionVisitor : public ir::IRMutator<> {
#endif
},
[&](common::HygonDCUArchHIP) {
PADDLE_THROW(::common::errors::Unimplemented(
"CINN todo: new hardware HygonDCUArchHIP"));
#ifdef CINN_WITH_HIP
hip::CodeGenHipDevice codegen_dev(
cinn::common::DefaultHygonDcuHipTarget());
codegen_dev.Compile(ir::LoweredFunc(func));
shared_mem_bytes = codegen_dev.GetDynSharedMemOffset();
#endif
});

VLOG(6) << "Add a call node for func->name " << func->name << "\n"
Expand All @@ -152,8 +159,7 @@ struct CollectHostFunctionVisitor : public ir::IRMutator<> {
call_kernel = runtime::intrinsic::call_cuda_kernel;
},
[&](common::HygonDCUArchHIP) {
PADDLE_THROW(::common::errors::Unimplemented(
"CINN todo: new hardware HygonDCUArchHIP"));
call_kernel = runtime::intrinsic::call_hip_kernel;
});

auto call_extern_api =
Expand Down
Loading

0 comments on commit ab86a17

Please sign in to comment.