diff --git a/.gitmodules b/.gitmodules index 0c41450793fc2..3cdbc077ba1ec 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,8 +70,8 @@ path = third_party/xbyak url = https://github.com/herumi/xbyak.git ignore = dirty -[submodule "third_party/mkldnn"] - path = third_party/mkldnn +[submodule "third_party/onednn"] + path = third_party/onednn url = https://github.com/oneapi-src/oneDNN.git ignore = dirty [submodule "third_party/flashattn"] @@ -118,3 +118,7 @@ path = third_party/cryptopp-cmake url = https://github.com/noloader/cryptopp-cmake.git ignore = dirty +[submodule "third_party/nlohmann_json"] + path = third_party/nlohmann_json + url = https://github.com/nlohmann/json.git + ignore = dirty diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7b100965d8e91..f1dcbd658cb35 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: hooks: - id: copyright_checker name: copyright_checker - entry: python ./tools/codestyle/copyright.hook + entry: python ./tools/codestyle/copyright.py language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$ exclude: | @@ -67,7 +67,7 @@ repos: - id: clang-format name: clang-format description: Format files with ClangFormat. - entry: bash ./tools/codestyle/clang_format.hook -i + entry: bash ./tools/codestyle/clang_format.sh -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$ - repo: local @@ -75,7 +75,7 @@ repos: - id: cpplint-cpp-source name: cpplint description: Check C++ code style using cpplint.py. - entry: bash ./tools/codestyle/cpplint_pre_commit.hook + entry: bash ./tools/codestyle/cpplint_pre_commit.sh language: system files: \.(cc|cxx|cpp|cu|h|hpp|hxx)$ args: diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md deleted file mode 100644 index 6b2614b101108..0000000000000 --- a/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,14 +0,0 @@ -Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us. -Both Chinese and English issues are welcome. - -It's hard to solve a problem when important details are missing. -Before submitting the issue, look over the following criteria before handing your request in. - -- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github. -- [ ] Did you retrieve your issue from widespread search engines ? -- [ ] Is my description of the issue clear enough to reproduce this problem? - * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc. - * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly. -- [ ] Is my description of the issue use the github markdown correctly? - * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc. - * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown. diff --git a/cmake/cinn.cmake b/cmake/cinn.cmake index 05210fd578365..4973c7e3cfb46 100644 --- a/cmake/cinn.cmake +++ b/cmake/cinn.cmake @@ -59,7 +59,7 @@ if(WITH_MKL) add_dependencies(cinn_mklml ${MKLML_PROJECT}) add_definitions(-DCINN_WITH_MKL_CBLAS) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) add_definitions(-DCINN_WITH_DNNL) endif() @@ -164,6 +164,8 @@ cinn_cc_library( isl ginac pybind + group_cluster + cinn_op_dialect ${jitify_deps}) add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB) add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ${core_deps}) @@ -175,9 +177,9 @@ target_link_libraries(cinnapi ${PYTHON_LIBRARIES}) if(WITH_MKL) target_link_libraries(cinnapi cinn_mklml) add_dependencies(cinnapi cinn_mklml) - if(WITH_MKLDNN) + if(WITH_ONEDNN) target_link_libraries(cinnapi ${MKLDNN_LIB}) - add_dependencies(cinnapi ${MKLDNN_PROJECT}) + add_dependencies(cinnapi ${ONEDNN_PROJECT}) endif() endif() @@ -220,21 +222,25 @@ function(gen_cinncore LINKTYPE) schedule_desc_proto absl isl - ginac) + ginac + pybind + group_cluster + cinn_op_dialect + ${jitify_deps}) add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB) add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ${core_deps}) target_link_libraries(${CINNCORE_TARGET} op_dialect pir phi) add_dependencies(${CINNCORE_TARGET} op_dialect pir phi) - add_dependencies(${CINNCORE_TARGET} pybind) + # add_dependencies(${CINNCORE_TARGET} pybind) target_link_libraries(${CINNCORE_TARGET} ${PYTHON_LIBRARIES}) if(WITH_MKL) target_link_libraries(${CINNCORE_TARGET} cinn_mklml) add_dependencies(${CINNCORE_TARGET} cinn_mklml) - if(WITH_MKLDNN) + if(WITH_ONEDNN) target_link_libraries(${CINNCORE_TARGET} ${MKLDNN_LIB}) - add_dependencies(${CINNCORE_TARGET} ${MKLDNN_PROJECT}) + add_dependencies(${CINNCORE_TARGET} ${ONEDNN_PROJECT}) endif() endif() @@ -247,16 +253,16 @@ function(gen_cinncore LINKTYPE) ${CUBLAS} ${CUDNN} ${CURAND} - ${CUSOLVER} - ${jitify_deps}) + ${CUSOLVER}) + # ${jitify_deps}) if(NVTX_FOUND) target_link_libraries(${CINNCORE_TARGET} ${CUDA_NVTX_LIB}) endif() endif() if(WITH_CUTLASS) - target_link_libraries(cinnapi cutlass) - add_dependencies(cinnapi cutlass) + target_link_libraries(${CINNCORE_TARGET} cutlass) + add_dependencies(${CINNCORE_TARGET} cutlass) endif() endfunction() diff --git a/cmake/external/json.cmake b/cmake/external/json.cmake new file mode 100644 index 0000000000000..b219e60cb9950 --- /dev/null +++ b/cmake/external/json.cmake @@ -0,0 +1,43 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +set(JSON_PREFIX_DIR ${THIRD_PARTY_PATH}/nlohmann_json) +set(JSON_INCLUDE_DIR ${JSON_PREFIX_DIR}/include) + +set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/nlohmann_json) +set(SOURCE_INCLUDE_DIR ${SOURCE_DIR}/include) + +include_directories(${JSON_INCLUDE_DIR}) + +set(JSON_BuildTests + OFF + CACHE INTERNAL "") + +ExternalProject_Add( + extern_json + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + SOURCE_DIR ${SOURCE_DIR} + PREFIX ${JSON_PREFIX_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") + +add_library(json INTERFACE) +#target_include_directories(json PRIVATE ${JSON_INCLUDE_DIR}) +add_dependencies(json extern_json) diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake index 515952eae88cd..c350f79945163 100644 --- a/cmake/external/lite.cmake +++ b/cmake/external/lite.cmake @@ -88,7 +88,7 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF -DLITE_WITH_CUDA=OFF - -DWITH_MKLDNN=OFF + -DWITH_ONEDNN=OFF -DLITE_WITH_X86=OFF -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON -DLITE_WITH_PROFILE=OFF @@ -141,7 +141,7 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON -DLITE_WITH_CUDA=OFF - -DWITH_MKLDNN=OFF + -DWITH_ONEDNN=OFF -DLITE_WITH_X86=ON -DLITE_WITH_PROFILE=OFF -DWITH_LITE=OFF diff --git a/cmake/external/mkldnn.cmake b/cmake/external/onednn.cmake similarity index 68% rename from cmake/external/mkldnn.cmake rename to cmake/external/onednn.cmake index 650a2a4196c86..8b1969f87b5a2 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/onednn.cmake @@ -14,13 +14,13 @@ include(ExternalProject) -set(MKLDNN_PROJECT "extern_mkldnn") -set(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn) -set(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +set(ONEDNN_PROJECT "extern_onednn") +set(ONEDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/onednn) +set(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onednn) set(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" - CACHE PATH "mkldnn include directory." FORCE) -set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/mkldnn) + CACHE PATH "oneDNN include directory." FORCE) +set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/onednn) # Introduce variables: # * CMAKE_INSTALL_LIBDIR @@ -36,28 +36,28 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/${LIBDIR}") include_directories(${MKLDNN_INC_DIR} -)# For MKLDNN code to include internal headers. +)# For oneDNN code to include internal headers. if(NOT WIN32) - set(MKLDNN_FLAG + set(ONEDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds" ) - set(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") - set(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") - set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") - set(MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - set(MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + set(ONEDNN_FLAG "${ONEDNN_FLAG} -Wno-unused-result -Wno-unused-value") + set(ONEDNN_CFLAG "${CMAKE_C_FLAGS} ${ONEDNN_FLAG}") + set(ONEDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${ONEDNN_FLAG}") + set(ONEDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(ONEDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") set(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" - CACHE FILEPATH "mkldnn library." FORCE) + CACHE FILEPATH "oneDNN library." FORCE) else() - set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") - set(MKLDNN_CFLAG "${CMAKE_C_FLAGS}") - string(REPLACE "/O2 " "" MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") - string(REPLACE "/O2 " "" MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(ONEDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") + set(ONEDNN_CFLAG "${CMAKE_C_FLAGS}") + string(REPLACE "/O2 " "" ONEDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + string(REPLACE "/O2 " "" ONEDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") set(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" - CACHE FILEPATH "mkldnn library." FORCE) + CACHE FILEPATH "oneDNN library." FORCE) endif() if(LINUX) @@ -67,21 +67,21 @@ else() endif() ExternalProject_Add( - ${MKLDNN_PROJECT} + ${ONEDNN_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} SOURCE_DIR ${SOURCE_DIR} - DEPENDS ${MKLDNN_DEPENDS} - PREFIX ${MKLDNN_PREFIX_DIR} + DEPENDS ${ONEDNN_DEPENDS} + PREFIX ${ONEDNN_PREFIX_DIR} UPDATE_COMMAND "" #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} - -DCMAKE_CXX_FLAGS_RELEASE=${MKLDNN_CXXFLAG_RELEASE} + -DCMAKE_CXX_FLAGS=${ONEDNN_CXXFLAG} + -DCMAKE_CXX_FLAGS_RELEASE=${ONEDNN_CXXFLAG_RELEASE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} + -DCMAKE_C_FLAGS=${ONEDNN_CFLAG} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${MKLDNN_CFLAG_RELEASE} + -DCMAKE_C_FLAGS_RELEASE=${ONEDNN_CFLAG_RELEASE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON @@ -90,7 +90,7 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} BUILD_BYPRODUCTS ${BUILD_BYPRODUCTS_ARGS}) -message(STATUS "MKLDNN library: ${MKLDNN_LIB}") +message(STATUS "OneDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_DNNL) # copy the real so.0 lib to install dir # it can be directly contained in wheel or capi @@ -123,21 +123,21 @@ if(WIN32) COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_LIB} /machine:x64 COMMENT "Generate mkldnn.lib manually--->" - DEPENDS ${MKLDNN_PROJECT} + DEPENDS ${ONEDNN_PROJECT} VERBATIM) - add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_LIB}) + add_custom_target(onednn_cmd ALL DEPENDS ${MKLDNN_LIB}) else() set(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libdnnl.so.3) add_custom_command( OUTPUT ${MKLDNN_SHARED_LIB} COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} - DEPENDS ${MKLDNN_PROJECT}) - add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB}) + DEPENDS ${ONEDNN_PROJECT}) + add_custom_target(onednn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB}) endif() -# generate a static dummy target to track mkldnn dependencies -# for cc_library(xxx SRCS xxx.c DEPS mkldnn) -generate_dummy_static_lib(LIB_NAME "mkldnn" GENERATOR "mkldnn.cmake") +# generate a static dummy target to track onednn dependencies +# for cc_library(xxx SRCS xxx.c DEPS onednn) +generate_dummy_static_lib(LIB_NAME "onednn" GENERATOR "onednn.cmake") -target_link_libraries(mkldnn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) -add_dependencies(mkldnn ${MKLDNN_PROJECT} mkldnn_cmd) +target_link_libraries(onednn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) +add_dependencies(onednn ${ONEDNN_PROJECT} onednn_cmd) diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake index 072658e54705a..28179cbf1ca20 100644 --- a/cmake/external/rocksdb.cmake +++ b/cmake/external/rocksdb.cmake @@ -14,8 +14,6 @@ include(ExternalProject) -# find_package(jemalloc REQUIRED) - set(ROCKSDB_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/rocksdb) set(ROCKSDB_TAG 6.19.fb) @@ -32,28 +30,10 @@ set(ROCKSDB_INCLUDE_DIR set(ROCKSDB_LIBRARIES "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" CACHE FILEPATH "rocksdb library." FORCE) -set(ROCKSDB_COMMON_FLAGS - "-g -pipe -O2 -W -Wall -Wno-unused-parameter -fPIC -fno-builtin-memcmp -fno-omit-frame-pointer" -) -set(ROCKSDB_FLAGS - "-DNDEBUG -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DOS_LINUX -DROCKSDB_FALLOCATE_PRESENT -DHAVE_PCLMUL -DZLIB -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX -DROCKSDB_BACKTRACE -DROCKSDB_SUPPORT_THREAD_LOCAL -DROCKSDB_USE_RTTI -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_AUXV_GETAUXVAL_PRESENT" -) -set(ROCKSDB_CMAKE_CXX_FLAGS - "${ROCKSDB_COMMON_FLAGS} -DROCKSDB_LIBAIO_PRESENT ${ROCKSDB_FLAGS} -fPIC -I${JEMALLOC_INCLUDE_DIR}" -) -if(NOT WITH_ARM) - set(ROCKSDB_FLAGS "${ROCKSDB_FLAGS} -DHAVE_SSE42") - set(ROCKSDB_CMAKE_CXX_FLAGS - "${ROCKSDB_CMAKE_CXX_FLAGS} -msse -msse4.2 -mpclmul") -endif() -set(ROCKSDB_CMAKE_C_FLAGS - "${ROCKSDB_COMMON_FLAGS} ${ROCKSDB_FLAGS} -DROCKSDB_LIBAIO_PRESENT -fPIC -I${JEMALLOC_INCLUDE_DIR}" -) -include_directories(${ROCKSDB_INCLUDE_DIR}) -set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread") - -set(ROCKSDB_CMAKE_SHARED_LINKER_FLAGS "-ldl -lrt -lz") +set(ROCKSDB_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DROCKSDB_LIBAIO_PRESENT -I${JEMALLOC_INCLUDE_DIR}") +set(ROCKSDB_SHARED_LINKER_FLAGS "-Wl,--no-as-needed -ldl") if(WITH_ARM) file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/rocksdb/libaio.h.patch @@ -62,6 +42,7 @@ if(WITH_ARM) git checkout -- . && git checkout ${ROCKSDB_TAG} && patch -Nd ${PADDLE_SOURCE_DIR}/third_party/rocksdb/env/ < ${native_src}) endif() + ExternalProject_Add( extern_rocksdb ${EXTERNAL_PROJECT_LOG_ARGS} @@ -76,25 +57,23 @@ ExternalProject_Add( -DWITH_GFLAGS=OFF -DWITH_TESTS=OFF -DWITH_JEMALLOC=ON - -DWITH_BENCHMARK_TOOLS=OFF - -DFAIL_ON_WARNINGS=OFF # For Clang compatibility -DJeMalloc_LIBRARIES=${JEMALLOC_LIBRARIES} -DJeMalloc_INCLUDE_DIRS=${JEMALLOC_INCLUDE_DIR} - -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS} - -DCMAKE_C_FLAGS=${ROCKSDB_CMAKE_C_FLAGS} - -DCMAKE_SHARED_LINKER_FLAGS=${ROCKSDB_CMAKE_SHARED_LINKER_FLAGS} - INSTALL_COMMAND - mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp - ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb-build/librocksdb.a - ${ROCKSDB_LIBRARIES} && cp -r ${ROCKSDB_SOURCE_DIR}/include - ${ROCKSDB_INSTALL_DIR}/ + -DWITH_BENCHMARK_TOOLS=OFF + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${ROCKSDB_CXX_FLAGS} + -DCMAKE_SHARED_LINKER_FLAGS=${ROCKSDB_SHARED_LINKER_FLAGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${ROCKSDB_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${ROCKSDB_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES}) +add_dependencies(extern_rocksdb snappy extern_jemalloc) add_library(rocksdb STATIC IMPORTED GLOBAL) - -add_dependencies(extern_rocksdb snappy) -add_dependencies(extern_rocksdb extern_jemalloc) set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES}) +include_directories(${ROCKSDB_INCLUDE_DIR}) add_dependencies(rocksdb extern_rocksdb) list(APPEND external_project_dependencies rocksdb) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 5b8dd6e0ffe59..1951748f0c126 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -28,6 +28,9 @@ set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so") if(NOT DEFINED XPU_BASE_DATE) set(XPU_BASE_DATE "20240104") endif() +if(NOT DEFINED XPU_XDNN_BASE_DATE) + set(XPU_XDNN_BASE_DATE "20240327") +endif() if(NOT DEFINED XPU_XHPC_BASE_DATE) set(XPU_XHPC_BASE_DATE "20240328") endif() @@ -45,6 +48,10 @@ else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() +set(XPU_XDNN_BASE_URL + "https://klx-sdk-release-public.su.bcebos.com/xdnn/stable/${XPU_XDNN_BASE_DATE}" +) + set(XPU_XCCL_BASE_URL "https://klx-sdk-release-public.su.bcebos.com/xccl/release/${XPU_XCCL_BASE_VERSION}" ) @@ -105,7 +112,7 @@ set(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) set(XPU_XDNN_URL - "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) set(XPU_XCCL_URL "${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" @@ -229,7 +236,7 @@ if(WITH_XPTI) endif() if(WITH_XPU_XHPC) - target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_XBLAS_LIB} + target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_XBLAS_LIB} ${XPU_API_LIB} ${XPU_XFA_LIB}) endif() diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 5a40695202525..8279f83369ca8 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -212,6 +212,11 @@ if(NOT WIN32) -Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=array-bounds # Warnings in Eigen::array ) + + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(GPU_COMMON_FLAGS -ccbin=${CMAKE_CXX_COMPILER} ${GPU_COMMON_FLAGS}) + endif() + if(NOT WITH_NV_JETSON AND NOT WITH_ARM AND NOT WITH_SW diff --git a/cmake/generic.cmake b/cmake/generic.cmake index d618c9667de83..4c8819e438a2f 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -598,9 +598,9 @@ function(paddle_test_build TARGET_NAME) ${paddle_test_DEPS} common paddle_gtest_main_new) add_dependencies(${TARGET_NAME} ${paddle_lib} ${paddle_test_DEPS} common paddle_gtest_main_new) - if(WITH_MKLDNN) - target_link_libraries(${TARGET_NAME} mkldnn) - add_dependencies(${TARGET_NAME} mkldnn) + if(WITH_ONEDNN) + target_link_libraries(${TARGET_NAME} onednn) + add_dependencies(${TARGET_NAME} onednn) endif() if(WITH_SHARED_PHI) target_link_libraries(${TARGET_NAME} $) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 3005da8aea125..3b81733d279d7 100755 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -114,8 +114,8 @@ function(copy_part_of_third_party TARGET DST) endif() endif() - if(WITH_MKLDNN) - set(dst_dir "${DST}/third_party/install/mkldnn") + if(WITH_ONEDNN) + set(dst_dir "${DST}/third_party/install/onednn") if(WIN32) copy( ${TARGET} diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 1713a2ea71626..f089f6e55b17b 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -126,7 +126,7 @@ function(register_onednn_kernel TARGET) "The MKLDNN kernel file of ${TARGET} should contains at least one *.*_onednn_op.cc file" ) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) cc_library( ${TARGET} SRCS ${onednn_cc_srcs} @@ -237,7 +237,7 @@ function(op_library TARGET) list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu) endif() endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) string(REPLACE "_op" "_onednn_op" MKLDNN_FILE "${TARGET}") if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/onednn/${MKLDNN_FILE}.cc) list(APPEND onednn_cc_srcs onednn/${MKLDNN_FILE}.cc) @@ -275,7 +275,7 @@ function(op_library TARGET) list(APPEND cudnn_cu_cc_srcs ${src}) elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$") list(APPEND cu_cc_srcs ${src}) - elseif(WITH_MKLDNN AND ${src} MATCHES ".*_onednn_op.cc$") + elseif(WITH_ONEDNN AND ${src} MATCHES ".*_onednn_op.cc$") list(APPEND onednn_cc_srcs ${src}) elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$") list(APPEND xpu_cc_srcs ${src}) @@ -610,7 +610,7 @@ function(op_library TARGET) endif() # pybind USE_OP_DEVICE_KERNEL for MKLDNN - if(WITH_MKLDNN AND ${onednn_cc_srcs_len} GREATER 0) + if(WITH_ONEDNN AND ${onednn_cc_srcs_len} GREATER 0) # Append first implemented MKLDNN activation operator if(${MKLDNN_FILE} STREQUAL "activation_onednn_op") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n") diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 676a25118303c..a305ef4759500 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -11,12 +11,17 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") set(AVX_FLAG "-mavx") set(AVX2_FLAG "-mavx2") set(AVX512F_FLAG "-mavx512f") + set(Wno_Maybe_Uninitialized "-Wno-maybe-uninitialized") + set(FMA_FLAG "-mfma") elseif(MSVC) set(MMX_FLAG "/arch:MMX") set(SSE2_FLAG "/arch:SSE2") set(SSE3_FLAG "/arch:SSE3") set(AVX_FLAG "/arch:AVX") set(AVX2_FLAG "/arch:AVX2") + set(AVX512F_FLAG "/arch:AVX512") + set(Wno_Maybe_Uninitialized "/wd4701") + set(FMA_FLAG "/arch:AVX2") endif() set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 9839f32f83c2b..e90a1c860eb31 100755 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -254,19 +254,19 @@ if(WIN32 OR APPLE) endif() set(WITH_MKLML ${WITH_MKL}) -if(NOT DEFINED WITH_MKLDNN) +if(NOT DEFINED WITH_ONEDNN) if(WITH_MKL AND AVX2_FOUND) - set(WITH_MKLDNN ON) + set(WITH_ONEDNN ON) else() message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN.") - set(WITH_MKLDNN OFF) + set(WITH_ONEDNN OFF) endif() endif() if(WIN32) if(MSVC) if(MSVC_VERSION LESS 1920) - set(WITH_MKLDNN OFF) + set(WITH_ONEDNN OFF) endif() endif() endif() @@ -303,7 +303,7 @@ if(WITH_CINN) if(WITH_MKL) add_definitions(-DCINN_WITH_MKL_CBLAS) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_definitions(-DCINN_WITH_DNNL) endif() include(cmake/cinn/version.cmake) @@ -362,9 +362,9 @@ elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) list(APPEND third_party_deps extern_openblas) endif() -if(WITH_MKLDNN) - include(external/mkldnn) # download, build, install mkldnn - list(APPEND third_party_deps extern_mkldnn) +if(WITH_ONEDNN) + include(external/onednn) # download, build, install onednn + list(APPEND third_party_deps extern_onednn) endif() include(external/protobuf) # find first, then download, build, install protobuf @@ -372,6 +372,11 @@ if(TARGET extern_protobuf) list(APPEND third_party_deps extern_protobuf) endif() +include(external/json) # find first, then build json +if(TARGET extern_json) + list(APPEND third_party_deps extern_json) +endif() + if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) include(external/python) # find python and python_module include(external/pybind11) # prepare submodule pybind11 diff --git a/cmake/version.cmake b/cmake/version.cmake index 28f022e0afa0e..185418127fdf4 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -96,7 +96,7 @@ function(version version_file) "Paddle version: ${PADDLE_VERSION}\n" "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "WITH_MKL: ${WITH_MKL}\n" - "WITH_MKLDNN: ${WITH_MKLDNN}\n" + "WITH_ONEDNN: ${WITH_ONEDNN}\n" "WITH_GPU: ${WITH_GPU}\n" "WITH_ROCM: ${WITH_ROCM}\n" "WITH_IPU: ${WITH_IPU}\n") diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc index 45923624945d0..89cfd3f7d462f 100644 --- a/paddle/cinn/ast_gen_ius/ast_gen.cc +++ b/paddle/cinn/ast_gen_ius/ast_gen.cc @@ -68,8 +68,11 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { const std::vector& axis = tensor->axis(); const std::vector& shape = tensor->shape; size_t axis_len = axis.size(); - CHECK_EQ(shape.size(), axis_len) << "Internal Error: Tensor has different " - "shape and axis length in AstGen"; + PADDLE_ENFORCE_EQ( + shape.size(), + axis_len, + phi::errors::InvalidArgument("Internal Error: Tensor has different " + "shape and axis length in AstGen")); std::vector axis_exprs; for (const auto& a : axis) { axis_exprs.push_back(a); diff --git a/paddle/cinn/frontend/CMakeLists.txt b/paddle/cinn/frontend/CMakeLists.txt index 2ba6ccd12e5bf..f84e4f0cfdc85 100755 --- a/paddle/cinn/frontend/CMakeLists.txt +++ b/paddle/cinn/frontend/CMakeLists.txt @@ -62,7 +62,7 @@ add_subdirectory(paddle) add_subdirectory(decomposer) add_subdirectory(op_mappers) add_subdirectory(pass) -# add_subdirectory(group_cluster) +add_subdirectory(group_cluster) cinn_cc_test(test_op_mapper_registry SRCS op_mapper_registry_test.cc DEPS cinncore) diff --git a/paddle/cinn/frontend/group_cluster/CMakeLists.txt b/paddle/cinn/frontend/group_cluster/CMakeLists.txt index 14cb3c1cfa0e8..3ade895bb2b6b 100644 --- a/paddle/cinn/frontend/group_cluster/CMakeLists.txt +++ b/paddle/cinn/frontend/group_cluster/CMakeLists.txt @@ -3,4 +3,7 @@ gather_srcs(group_cluster_src SRCS common_utils.cc pattern_node.cc add_subdirectory(cluster_policy) -cc_library(group_cluster SRCS ${group_cluster_src}) +cc_library( + group_cluster + SRCS ${group_cluster_src} + DEPS phi) diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt b/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt index c5328419c7f7b..7b86c45ca4dd9 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt @@ -1,3 +1,3 @@ -gather_srcs(group_cluster_src SRCS general_topo_policy.cc policy_manager.cc) - +gather_srcs(group_cluster_src SRCS general_topo_policy.cc policy_manager.cc + relative_judge_policy.cc) add_subdirectory(shardable_axes_policy) diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc index 87f8523eda49f..2348701af3d99 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc @@ -16,10 +16,28 @@ namespace cinn::frontend::group_cluster::policy { -bool GeneralTopoPolicy::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { - // TODO(wuzhanfei) topo policy (if lead to loop) +bool IsDownstreamNode(const PatternNodePtr start, const PatternNodePtr target) { + if (start == target) return true; + for (const auto& down_node : start->downstream_) { + if (IsDownstreamNode(down_node, target)) return true; + } return false; } +bool IsIndirectDownstreamNode(const PatternNodePtr start, + const PatternNodePtr target) { + for (const auto& node : start->downstream_) { + if (node == target) continue; + if (IsDownstreamNode(node, target)) return true; + } + return false; +} + +bool GeneralTopoPolicy::CanFuse(const PatternNodePtr& first, + const PatternNodePtr& second) { + VLOG(4) << "Start GeneralTopoPolicy"; + return !(IsIndirectDownstreamNode(first, second) || + IsIndirectDownstreamNode(second, first)); +} + } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h b/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h index c7cfc23feb89e..ae0801a2fe402 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h @@ -19,7 +19,9 @@ namespace cinn::frontend::group_cluster::policy { class GeneralTopoPolicy final : virtual public Policy { public: - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + std::string Name() { return "GeneralTopoPolicy"; } }; } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc index 3f54bacbd3ecd..edbbe90ec315f 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc @@ -17,12 +17,22 @@ namespace cinn::frontend::group_cluster::policy { -bool PolicyManager::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { +bool PolicyManager::CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const { for (const auto& policy : policies_) { if (!policy->CanFuse(upstream, downstream)) return false; } return true; } +std::vector PolicyManager::GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) const { + for (const auto& policy : policies_) { + if (policy->Name() == "RelativeJudgePolicy") { + return policy->GetFakeReduceIterIdx(upstream, downstream); + } + } + return {}; +} + } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h index f7a2f100add82..414b16f0e725e 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h @@ -20,8 +20,13 @@ namespace cinn::frontend::group_cluster::policy { class Policy { public: - virtual bool CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) = 0; + virtual std::string Name() = 0; + virtual bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) = 0; + virtual std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + return {}; + } }; using PolicyPtr = std::shared_ptr; @@ -30,7 +35,10 @@ class PolicyManager { public: explicit PolicyManager(const std::vector& policies) : policies_(policies) {} - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const; + std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) const; private: std::vector policies_; diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.cc new file mode 100644 index 0000000000000..2b14f0c35aa2e --- /dev/null +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.cc @@ -0,0 +1,325 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h" + +namespace cinn::frontend::group_cluster::policy { + +bool RelativeJudgePolicy::IsDownstreamStmtDependReduceOp( + pir::Operation* reduce, const StmtPattern& downstream) { + const auto& values = GetPatternInputValues(downstream); + for (const auto& value : reduce->results()) { + if (std::find(values.begin(), values.end(), value) != values.end()) { + return true; + } + } + return false; +} + +std::optional RelativeJudgePolicy::GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector& candidates) { + pir::Operation* reduce = upstream.GetReduceOp(); + for (const auto& candidate : candidates) { + if (IsDownstreamStmtDependReduceOp(reduce, candidate)) { + return candidate; + } + } + return {}; +} + +SplitDims SplitReduceInputDimsIfRelatedWithNonReduceAxis( + const ShardableAxesSignature& signature, pir::Operation* op) { + const auto& v = op->operand_source(0); + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set output_names_set(output_names.begin(), + output_names.end()); + auto result = SplitDims(); + int idx = 0; + for (const auto& in : input_names) { + if (output_names_set.count(in) == 0) { + result.non_related.emplace_back(v, idx); + } else { + result.related.emplace_back(v, idx); + } + idx += 1; + } + return result; +} + +SplitDims SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + const ShardableAxesSignature& signature, const pir::Operation* op) { + const auto& v = op->result(0); + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set input_names_set(input_names.begin(), input_names.end()); + auto result = SplitDims(); + int idx = 0; + for (const auto& name : output_names) { + if (input_names_set.count(name) == 0) { + result.non_related.emplace_back(v, idx); + } else { + result.related.emplace_back(v, idx); + } + idx += 1; + } + return result; +} + +bool RelativeJudgePolicy::IsBroadcastEdge( + const std::vector& upstream_out_dims, + const std::vector& downstream_reduce_dims) { + VLOG(4) << "IsBroadcastEdge: upstream_out_dims.size()" + << upstream_out_dims.size(); + VLOG(4) << "IsBroadcastEdge: downstream_reduce_dims.size()" + << downstream_reduce_dims.size(); + + for (const auto& downstream_reduce_dim : downstream_reduce_dims) { + for (const auto& upstream_out_dim : upstream_out_dims) { + VLOG(4) << "upstream_out_dim: " << upstream_out_dim.DebugStr() + << " downstream_reduce_dim: " << downstream_reduce_dim.DebugStr(); + if (IsRelated(upstream_out_dim, downstream_reduce_dim)) { + return false; + } + } + } + + VLOG(4) << "IsBroadcastEdge"; + return true; +} + +bool RelativeJudgePolicy::ReduceTreeGrownCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + const auto& upstream_tree = + std::get(upstream->stmt_pattern_); + VLOG(4) << "upstream->stmt_pattern_:" + << OpsDebugStr(GetOpsInPattern(upstream_tree)); + const auto& downstream_tree = + std::get(downstream->stmt_pattern_); + VLOG(4) << "downstream->stmt_pattern_" + << OpsDebugStr(GetOpsInPattern(downstream_tree)); + const auto& maybe_downstream_op = GetDownstreamFromCandidate( + upstream_tree.GetRootPattern(), downstream_tree.reduce_patterns_); + int idx = 0; + for (const auto& r_pattern : downstream_tree.reduce_patterns_) { + idx += 1; + VLOG(4) << "downstream_tree.reduce_patterns_" + << "[" << idx << "]" << OpsDebugStr(GetOpsInPattern(r_pattern)); + } + if (!maybe_downstream_op.has_value()) { + VLOG(4) << "can't find candidate from patterns. can fuse return false."; + return false; + } + const pir::Value& reduce_out_value = + upstream_tree.GetRootPattern().GetReduceOp()->result(0); + pir::Operation* downstream_reduce_op = + maybe_downstream_op.value().GetReduceOp(); + const auto& split_reduce_dim_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(downstream_reduce_op), downstream_reduce_op); + VLOG(4) << split_reduce_dim_result.DebugStr(); + const auto& upstream_output_dims = GetAllValueDimFromValue(reduce_out_value); + auto res = IsBroadcastEdge(upstream_output_dims, + split_reduce_dim_result.non_related); + VLOG(4) << "ReduceTreeGrownCanMerge: " << res; + return res; +} + +SplitDims RelativeJudgePolicy::SplitDimsWithRelationship( + const std::vector& targets, + const std::vector& related_with) { + VLOG(4) << "SplitDimsWithRelationship"; + auto result = SplitDims(); + bool is_related = false; + for (auto& target_dim : targets) { + is_related = false; + for (auto& related_dim : related_with) { + if (IsRelated(related_dim, target_dim)) is_related = true; + } + if (is_related) { + result.related.push_back(target_dim); + } else { + result.non_related.push_back(target_dim); + } + } + + return result; +} + +bool DimsEqual(const std::vector& first, + const std::vector& second) { + const auto GetDimInfo = + [](const std::vector& dims) -> std::unordered_map { + std::unordered_map result; + for (const auto& dim : dims) { + VLOG(4) << "dim: " << dim.DebugStr(); + size_t value = dim.GetNumericValue(); + VLOG(4) << "value: " << value; + if (result.find(value) == result.end()) { + result[value] = 1; + } else { + result[value] += 1; + } + } + return result; + }; + VLOG(4) << "GetDimInfo"; + const std::unordered_map& first_dims = GetDimInfo(first); + VLOG(4) << "GetDimInfo"; + const std::unordered_map& second_dims = GetDimInfo(second); + if (first_dims.size() != second_dims.size()) return false; + for (const auto& [dim_value, count] : first_dims) { + if (second_dims.find(dim_value) == second_dims.end() || + second_dims.at(dim_value) != count) + return false; + } + return true; +} + +bool RelativeJudgePolicy::ReducePlusTrivialCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + VLOG(4) << "RT can fuse"; + + // const auto& split_reduce_dims_result = + // SplitReduceInputDimsIfRelatedWithNonReduceAxis( + // axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + + // VLOG(4) << split_reduce_dims_result.DebugStr(); + + // const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + // const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + + // TODO(wuzhanfei) fix bug in relation that if has multi path in graph + // test_rms_norm can test + + const auto& split_reduce_input_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_reduce_dims = split_reduce_input_dims_result.non_related; + + const auto& split_reduce_output_dims_result = + SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_non_reduce_dims = + split_reduce_output_dims_result.related; + // replace codes upside with original design + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + VLOG(4) << split_trivial_dims_result.DebugStr(); + + auto res = + DimsEqual(split_trivial_dims_result.non_related, upstream_reduce_dims); + res = res || IsFlattenDimSmaller(upstream, downstream); + VLOG(4) << "ReducePlusTrivialCanMerge: " << res; + return res; +} + +bool RelativeJudgePolicy::IsFlattenDimSmaller( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + const auto& split_reduce_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + VLOG(4) << "IsFlattenDimSmaller: " + << axes_info_.GetSignature(downstream->sink_op_).DebugStr(); + int rank = axes_info_.GetSignature(downstream->sink_op_) + .outputs[0] + .axis_names.size(); + VLOG(4) << "IsFlattenDimSmaller: " << rank << " " + << split_trivial_dims_result.related.size() << " " + << upstream_non_reduce_dims.size(); + bool res = (rank - split_trivial_dims_result.related.size()) <= + upstream_non_reduce_dims.size(); + VLOG(4) << "IsFlattenDimSmaller: " << res; + return res; +} + +bool RelativeJudgePolicy::CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) { + if (upstream->IsReduceTree() && downstream->IsTrivial()) { + return ReducePlusTrivialCanMerge(upstream, downstream); + } + if (upstream->IsReduceTree() && downstream->IsReduceTree()) { + return ReduceTreeGrownCanMerge(upstream, downstream); + } + return true; // other case. +} + +std::vector RelativeJudgePolicy::GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + if (!upstream->IsReduceTree() || !downstream->IsTrivial()) { + PADDLE_THROW("Illegal Call GetFakeReduceIterIdx"); + } + + // TODO(xiongkun): replace after fix bug in relation that if has multi path in + // graph const auto& split_reduce_dims_result = + // SplitReduceInputDimsIfRelatedWithNonReduceAxis( + // axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + + // const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + // const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + // + + const auto& split_reduce_input_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_reduce_dims = split_reduce_input_dims_result.non_related; + + const auto& split_reduce_output_dims_result = + SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_non_reduce_dims = + split_reduce_output_dims_result.related; + + // ======================= + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + const auto& trivial_reorder_dims = split_trivial_dims_result.non_related; + + // CHECK(upstream_reduce_dims.size() == trivial_reorder_dims.size() || + // trivial_reorder_dims.size() == 0); + std::unordered_set visited_dims; + std::vector result; + for (auto& reduce_dim : upstream_reduce_dims) { + for (auto& trivial_dim : trivial_reorder_dims) { + if (visited_dims.find(trivial_dim) == visited_dims.end() && + trivial_dim.GetNumericValue() == reduce_dim.GetNumericValue()) { + visited_dims.emplace(trivial_dim); + result.emplace_back(trivial_dim.idx_); + break; + } + } + } + VLOG(4) << "FakeReduceIterIdx: " << cinn::utils::Join(result, ", "); + return result; +} + +} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h b/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h new file mode 100644 index 0000000000000..e98b68dc893af --- /dev/null +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h @@ -0,0 +1,301 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" +#include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h" +#include "paddle/cinn/frontend/group_cluster/common_utils.h" + +namespace cinn::frontend::group_cluster::policy { + +struct ValueDim { + pir::Value v_; + size_t idx_; + ValueDim(pir::Value v, size_t idx) : v_(v), idx_(idx) {} + ValueDim() = default; + ValueDim(const ValueDim& v) = default; + bool operator==(const ValueDim& v) const { + return (idx_ == v.idx_) && (v_ == v.v_); + } + + size_t GetNumericValue() const { + return v_.type().dyn_cast().dims().at(idx_); + } + + std::string DebugStr() const { + std::ostringstream oss; + oss << "ValueDim: "; + oss << "Index: " << idx_; + oss << ", "; + v_.defining_op()->Print(oss); + return oss.str(); + } +}; + +struct ValueDimHash { + std::size_t operator()(const ValueDim& p) const { + auto h1 = std::hash{}(p.idx_); + auto h2 = std::hash{}(p.v_); + // Mainly for demonstration purposes, i.e. works but is overly simple + // In the real world, use sth. like boost.hash_combine + return h1 ^ (h2 << 1); + } +}; + +using ValueDimRelation = + std::unordered_map, + ValueDimHash>; +// ValueDimRelation[in][out] = True; means f(out) = in is related. + +static std::vector GetAllValueDimFromValue(const pir::Value& v) { + std::vector value_dims; + size_t rank = GetRank(v); + for (size_t i = 0; i < rank; ++i) { + value_dims.emplace_back(v, i); + } + return value_dims; +} + +static std::vector GetAllInputValueDim(pir::Operation* op) { + std::vector value_dims; + for (const auto& v : op->operands()) { + value_dims = ConcatVector(value_dims, GetAllValueDimFromValue(v.source())); + } + return value_dims; +} + +static std::vector GetAllOutputValueDim(pir::Operation* op) { + std::vector value_dims; + for (const auto& v : op->results()) { + value_dims = ConcatVector(value_dims, GetAllValueDimFromValue(v)); + } + return value_dims; +} + +static ValueDimRelation CreateOpRelativenessForElementWise(pir::Operation* op) { + ValueDimRelation res; + for (const auto& v : op->operands()) { + const auto& value_dims = GetAllValueDimFromValue(v.source()); + const auto& out_value_dims = GetAllOutputValueDim(op); + CHECK_EQ(value_dims.size(), out_value_dims.size()); + for (size_t i = 0; i < value_dims.size(); ++i) { + res[value_dims[i]][out_value_dims[i]] = true; + } + } + return res; +} + +static std::vector> GetNonBroadCastDims( + pir::Operation* op) { + std::vector> res; + const auto* shape_analysis = + &pir::ShapeAnalysisManager::Instance().Get(op->GetParentProgram()); + + const auto& broad_cast_value = GetBroadcastOpInputOuputValue(op); + CHECK(broad_cast_value.has_value()); + + const auto& [input_value, output_value] = broad_cast_value.value(); + const int input_rank = GetRank(input_value); + const int output_rank = GetRank(output_value); + CHECK_GE(output_rank, input_rank); + + // Compare axis one by one, from back to front. + // The rule of broadcasting: + // https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/beginner/tensor_cn.html#id7 + for (int i = 1; i <= input_rank; ++i) { + int input_axis = input_rank - i; + int output_axis = output_rank - i; + if (input_axis < 0 || output_axis < 0) break; + if (shape_analysis->IsProductEqual( + input_value, {input_axis}, output_value, {output_axis})) { + res.emplace_back(input_axis, output_axis); + } + } + + return res; +} + +static ValueDimRelation CreateOpRelativenessForBroadcast(pir::Operation* op) { + ValueDimRelation res; + const auto& in_value = op->operand(0).source(); + const auto& out_value = op->result(0); + for (const auto& t : GetNonBroadCastDims(op)) { + res[ValueDim(in_value, t.first)][ValueDim(out_value, t.second)] = true; + } + return res; +} + +static ValueDimRelation CreateOpRelativenessForDefault(pir::Operation* op) { + ValueDimRelation res; + for (const auto& out_dim : GetAllOutputValueDim(op)) { + for (const auto& in_dim : GetAllInputValueDim(op)) { + res[in_dim][out_dim] = true; + } + } + return res; +} + +static ValueDimRelation CreateOpRelativenessForReduce(pir::Operation* op) { + const auto& reduce_axis_idx = GetReduceAxisIdx(op); + ValueDimRelation res; + const size_t input_rank = GetRank(op->operand_source(0)); + int out_idx = 0; + bool keep_dim = GetReduceOpKeepDims(op); + for (int i = 0; i < input_rank; i++) { + if (std::find(reduce_axis_idx.begin(), reduce_axis_idx.end(), i) != + reduce_axis_idx.end()) { + res[ValueDim(op->operand_source(0), i)] + [ValueDim(op->result(0), out_idx)] = true; + out_idx += 1; + } else { + out_idx += keep_dim; + } + } + return res; +} + +static std::optional CreateOpRelativenessForSpecialOps( + pir::Operation* op) { + if (op->name() == "cinn_op.reshape") { + // Special Elementwise. + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "pd_op.reshape") { + // Special Elementwise. + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "cinn_op.generate_shape") { + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "cinn_op.yield_store") { + return CreateOpRelativenessForDefault(op); + } + return {}; +} + +static ValueDimRelation GetSingleOpRelation(pir::Operation* op) { + VLOG(4) << "GetSingleOpRelation for " << op->name(); + const auto& special_result = CreateOpRelativenessForSpecialOps(op); + if (special_result != std::nullopt) { + return special_result.value(); + } + + CHECK(op->num_results() == 1) + << "Now we do not support op with multi outputs: " << op->name(); + const hlir::framework::OpPatternKind kind = GetOpPatternKind(op); + ValueDimRelation result; + if (kind == hlir::framework::kReduction) { + result = CreateOpRelativenessForReduce(op); + } else if (kind == hlir::framework::kElementWise) { + result = CreateOpRelativenessForElementWise(op); + } else if (kind == hlir::framework::kBroadcast) { + result = CreateOpRelativenessForBroadcast(op); + } else { + result = CreateOpRelativenessForDefault(op); + } + return result; +} + +static std::vector> FlattenRelation( + const ValueDimRelation& axes_relation) { + std::vector> res; + for (const auto& in_dim_pair : axes_relation) { + for (const auto& out_dim_pair : in_dim_pair.second) { + res.emplace_back(in_dim_pair.first, out_dim_pair.first); + } + } + return res; +} + +static ValueDimRelation AnalysisIndexExprRelation( + const std::vector& ops) { + ValueDimRelation res; + + for (size_t i = ops.size(); i >= 1; --i) { + pir::Operation* op = ops[i - 1]; + if (op->name() == "cf.yield") continue; + + const auto& value_dim_relation = GetSingleOpRelation(op); + for (const auto& in_out_pair : FlattenRelation(value_dim_relation)) { + for (const auto& out_relation : res[in_out_pair.second]) { + res[in_out_pair.first][out_relation.first] = true; + } + res[in_out_pair.first][in_out_pair.second] = true; + } + } + return res; +} + +struct SplitDims { + std::vector related; + std::vector non_related; + + std::string DebugStr() const { + std::stringstream ss; + ss << "SplitDims:\nrelated:\n"; + for (const auto& dim : related) { + ss << dim.DebugStr() << "\n"; + } + ss << "non_related:\n"; + for (const auto& dim : non_related) { + ss << dim.DebugStr() << "\n"; + } + return ss.str(); + } +}; + +class RelativeJudgePolicy final : public Policy { + public: + RelativeJudgePolicy(const std::vector& ops, + const pir::ShapeConstraintIRAnalysis* shape_analysis) + : axes_info_(ops, shape_analysis) { + VLOG(4) << "[relative_judge_policy] Start AnalysisIndexExprRelation."; + index_expr_map_ = AnalysisIndexExprRelation(ops); + VLOG(4) << "[relative_judge_policy] End AnalysisIndexExprRelation."; + } + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + + std::string Name() { return "RelativeJudgePolicy"; } + + std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + + bool IsRelated(ValueDim in, ValueDim out) { + return index_expr_map_[in].count(out) == 1; + } + + private: + ValueDimRelation index_expr_map_; + ShardableAxesInfoManager axes_info_; + bool ReduceTreeGrownCanMerge(const PatternNodePtr&, const PatternNodePtr&); + bool IsFlattenDimSmaller(const PatternNodePtr& upstream, + const PatternNodePtr& downstream); + bool ReducePlusTrivialCanMerge(const PatternNodePtr&, const PatternNodePtr&); + SplitDims SplitDimsWithRelationship( + const std::vector& targets, + const std::vector& related_with); + std::optional GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector& candidates); + bool IsDownstreamStmtDependReduceOp(pir::Operation* reduce, + const StmtPattern& downstream); + bool IsBroadcastEdge(const std::vector& upstream_out_dims, + const std::vector&); +}; + +} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc index ef58985330b70..f14f9b3051de2 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc @@ -18,8 +18,37 @@ namespace cinn::frontend::group_cluster::policy { +ShardableAxes ShardableAxesInfoManager::ReplaceShardableAxesWithRootName( + const ShardableAxes& axes) { + std::vector names; + for (auto name : axes.axis_names) { + names.push_back(name_union_[name]); + } + return ShardableAxes(names); +} + +ShardableAxesSignature ShardableAxesInfoManager::GetSignature( + pir::Operation* op) { + return op_signature_map_[op]; + // TODO(baizhou) fix broadcast signature and enable here + // auto result = ShardableAxesSignature(); + // auto origin_sig = op_signature_map_[op]; + // for (const auto& axes : origin_sig.inputs) { + // result.inputs.emplace_back(ReplaceShardableAxesWithRootName(axes)); + // } + // for (const auto& axes : origin_sig.outputs) { + // result.outputs.emplace_back(ReplaceShardableAxesWithRootName(axes)); + // } + // return result; +} + +ShardableAxes ShardableAxesInfoManager::GetAxes(pir::Value value) { + return ReplaceShardableAxesWithRootName(value_axes_map_[value]); +} + std::string ShardableAxesInfoManager::GetUniqueName() { static std::atomic counter = 0; + counter += 1; return "D" + std::to_string(counter); } @@ -31,7 +60,7 @@ std::vector CreateNewNamesWithRank(int64_t rank) { return result; } -ShardableAxesSignature CreateDefaultSignature(const pir::Operation* op) { +ShardableAxesSignature CreateDefaultSignature(pir::Operation* op) { ShardableAxesSignature result = ShardableAxesSignature(); for (int i = 0; i < op->num_operands(); ++i) { result.inputs.emplace_back( @@ -44,15 +73,26 @@ ShardableAxesSignature CreateDefaultSignature(const pir::Operation* op) { } std::optional CreateSignatureForSpecialOps( - const pir::Operation* op) { + pir::Operation* op) { if (op->isa()) { return CreateDefaultSignature(op); } + if (op->name() == "cinn_op.generate_shape") { + return CreateDefaultSignature(op); + } + if (op->name() == "cinn_op.yield_store") { + return CreateDefaultSignature(op); + } + if (op->name() == "cinn_op.reshape") { + return CreateDefaultSignature(op); + } + if (op->name() == "pd_op.reshape") { + return CreateDefaultSignature(op); + } return std::nullopt; } -ShardableAxesSignature CreateSignatureForReduce( - const pir::Operation* reduce_op) { +ShardableAxesSignature CreateSignatureForReduce(pir::Operation* reduce_op) { CHECK_EQ(reduce_op->num_operands(), 1); CHECK_EQ(reduce_op->num_results(), 1); ShardableAxesSignature result = ShardableAxesSignature(); @@ -67,7 +107,7 @@ ShardableAxesSignature CreateSignatureForReduce( if (std::find(reduce_axis_idx.begin(), reduce_axis_idx.end(), i) != reduce_axis_idx.end()) { if (keep_dim) { - output_axes.emplace_back("constant_1"); + output_axes.emplace_back(ShardableAxesInfoManager::GetUniqueName()); } // else do nothing } else { output_axes.emplace_back(input_axes[i]); @@ -80,7 +120,7 @@ ShardableAxesSignature CreateSignatureForReduce( return result; } -ShardableAxesSignature CreateSignatureForElementWise(const pir::Operation* op) { +ShardableAxesSignature CreateSignatureForElementWise(pir::Operation* op) { ShardableAxesSignature result = ShardableAxesSignature(); int64_t rank = GetRank(op->result(0)); @@ -97,24 +137,58 @@ ShardableAxesSignature CreateSignatureForElementWise(const pir::Operation* op) { return result; } -ShardableAxesSignature CreateSignatureForBroadcast(const pir::Operation* op) { +ShardableAxesSignature CreateSignatureForBroadcast( + pir::Operation* op, const pir::ShapeConstraintIRAnalysis* shape_analysis) { + ShardableAxesSignature result = ShardableAxesSignature(); + const auto& broad_cast_value = GetBroadcastOpInputOuputValue(op); - if (!broad_cast_value.has_value()) { - return CreateDefaultSignature(op); + CHECK(broad_cast_value.has_value()); + + const auto& [input_value, output_value] = broad_cast_value.value(); + const int input_rank = GetRank(input_value); + const int output_rank = GetRank(output_value); + CHECK_GE(output_rank, input_rank); + + // Create axes for operands. For expand op, the second operand is the shape of + // output. + for (int i = 0; i < op->num_operands(); ++i) { + result.inputs.emplace_back( + CreateNewNamesWithRank(GetRank(op->operand_source(i)))); + } + + // Create output axes. Compare axis one by one, from back to front. + // The rule of broadcasting: + // https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/beginner/tensor_cn.html#id7 + const auto& input_axis_names = result.inputs[0].axis_names; + std::vector output_axis_names; + for (int i = 1; i <= output_rank; ++i) { + int input_axis = input_rank - i; + int output_axis = output_rank - i; + if ((input_axis >= 0) && + shape_analysis->IsProductEqual( + input_value, {input_axis}, output_value, {output_axis})) { + output_axis_names.emplace_back(input_axis_names[input_axis]); + } else { + output_axis_names.emplace_back(ShardableAxesInfoManager::GetUniqueName()); + } } - const auto& [input, output] = broad_cast_value.value(); - // TODO(wuzhanfei) support broadcast - return CreateDefaultSignature(op); + std::reverse(output_axis_names.begin(), output_axis_names.end()); + result.outputs.emplace_back(ShardableAxes(output_axis_names)); + + return result; } -ShardableAxesSignature CreateShardableSignature(const pir::Operation* op) { +ShardableAxesSignature ShardableAxesInfoManager::CreateShardableSignature( + pir::Operation* op) { auto special_result = CreateSignatureForSpecialOps(op); if (special_result != std::nullopt) { + VLOG(4) << "[ShardableAxesInfoManager] Create Shardable Axes Signature : \n" + << op->name() << " : " << special_result.value().DebugStr(); return special_result.value(); } CHECK(op->num_results() == 1) - << "Now we do not support op with multi outputs"; + << "Now we do not support op with multi outputs: " << op->name(); ShardableAxesSignature result; const hlir::framework::OpPatternKind kind = GetOpPatternKind(op); if (kind == hlir::framework::kReduction) { @@ -122,7 +196,7 @@ ShardableAxesSignature CreateShardableSignature(const pir::Operation* op) { } else if (kind == hlir::framework::kElementWise) { result = CreateSignatureForElementWise(op); } else if (kind == hlir::framework::kBroadcast) { - result = CreateSignatureForBroadcast(op); + result = CreateSignatureForBroadcast(op, shape_analysis_); } else { result = CreateDefaultSignature(op); } @@ -132,17 +206,61 @@ ShardableAxesSignature CreateShardableSignature(const pir::Operation* op) { } ShardableAxesInfoManager::ShardableAxesInfoManager( - const std::vector& ops, + const std::vector& ops, const pir::ShapeConstraintIRAnalysis* shape_analysis) : ops_(ops), shape_analysis_(shape_analysis) { for (const auto& op : ops) { + if (op->name() == "cf.yield") continue; op_signature_map_[op] = CreateShardableSignature(op); } - // TODO(wuzhanfei) update value_axes_map_ name_union_ + const auto FindRoot = [&](std::string non_root) { + std::string result = non_root; + while (name_union_[result] != result) { + result = name_union_[result]; + } + return result; + }; + + const auto CombineAxes = [&](const ShardableAxes& root, + const ShardableAxes& non_root) { + CHECK_EQ(root.axis_names.size(), non_root.axis_names.size()); + for (int i = 0; i < non_root.axis_names.size(); i++) { + name_union_[non_root.axis_names[i]] = FindRoot(root.axis_names[i]); + } + }; + + for (const auto& [op, axes_signature] : op_signature_map_) { + for (int i = 0; i < op->num_operands(); ++i) { + auto value = op->operand_source(i); + auto axes = axes_signature.inputs[i]; + if (value_axes_map_.find(value) == value_axes_map_.end()) { + value_axes_map_[value] = axes; + for (auto& axis_name : axes.axis_names) { + name_union_[axis_name] = axis_name; + } + } else { + CombineAxes(value_axes_map_[value], axes); + } + } + for (int i = 0; i < op->num_results(); ++i) { + auto value = op->result(i); + auto axes = axes_signature.outputs[i]; + if (value_axes_map_.find(value) == value_axes_map_.end()) { + value_axes_map_[value] = axes; + for (auto& axis_name : axes.axis_names) { + name_union_[axis_name] = axis_name; + } + } else { + CombineAxes(value_axes_map_[value], axes); + } + } + } + + VLOG(4) << NameUnionDebugStr(); } -std::string ShardableAxes::DebugStr() { +std::string ShardableAxes::DebugStr() const { std::stringstream ss; for (const auto& name : axis_names) { ss << name << ", "; @@ -150,7 +268,7 @@ std::string ShardableAxes::DebugStr() { return ss.str(); } -std::string ShardableAxesSignature::DebugStr() { +std::string ShardableAxesSignature::DebugStr() const { std::stringstream ss; ss << "ShardableAxes Signature:\n"; for (int i = 0; i < inputs.size(); i++) { @@ -162,4 +280,27 @@ std::string ShardableAxesSignature::DebugStr() { return ss.str(); } +std::string ShardableAxesInfoManager::NameUnionDebugStr() const { + std::stringstream ss; + ss << "[ShardableAxesInfoManager] NameUnion :\n"; + + std::unordered_map> root_to_sons; + for (const auto& [non_root, root] : name_union_) { + if (root_to_sons.find(root) == root_to_sons.end()) { + root_to_sons[root] = std::vector{non_root}; + } else { + root_to_sons[root].push_back(non_root); + } + } + for (const auto& [root, sons] : root_to_sons) { + ss << "Root " << root << ": "; + for (const auto& son : sons) { + ss << son << ", "; + } + ss << "\n"; + } + + return ss.str(); +} + } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h index c9c341c0b05de..b2795f944f938 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h @@ -19,32 +19,35 @@ namespace cinn::frontend::group_cluster::policy { struct ShardableAxes { + ShardableAxes() : axis_names({}) {} explicit ShardableAxes(const std::vector& names) : axis_names(names) {} std::vector axis_names; - std::string DebugStr(); + std::string DebugStr() const; }; struct ShardableAxesSignature { std::vector inputs; std::vector outputs; - std::string DebugStr(); + std::string DebugStr() const; }; struct ShardableAxesInfoManager { ShardableAxesInfoManager( - const std::vector& ops, + const std::vector& ops, const pir::ShapeConstraintIRAnalysis* shape_analysis); - ShardableAxesSignature GetSignature(const pir::Operation* op); - ShardableAxes GetAxes(const pir::Value value); + ShardableAxesSignature GetSignature(pir::Operation* op); + ShardableAxes GetAxes(pir::Value value); + ShardableAxesSignature CreateShardableSignature(pir::Operation* op); + ShardableAxes ReplaceShardableAxesWithRootName(const ShardableAxes& axes); static std::string GetUniqueName(); + std::string NameUnionDebugStr() const; private: - const std::vector& ops_; + const std::vector& ops_; const pir::ShapeConstraintIRAnalysis* shape_analysis_; - std::unordered_map - op_signature_map_; + std::unordered_map op_signature_map_; std::unordered_map value_axes_map_; std::unordered_map name_union_; }; diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc index 36835406267a3..17606d0cf771c 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc @@ -16,10 +16,75 @@ namespace cinn::frontend::group_cluster::policy { -bool ShardableAxesPolicy::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { - // TODO(wuzhanfei) shardable axes policy +bool ShardableAxesRRFusePolicy::IsDownstreamStmtDependReduceOp( + pir::Operation* reduce, const StmtPattern& downstream) { + const auto& values = GetPatternInputValues(downstream); + for (const auto& value : reduce->results()) { + if (std::find(values.begin(), values.end(), value) != values.end()) { + return true; + } + } return false; } +std::optional +ShardableAxesRRFusePolicy::GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector& candidates) { + pir::Operation* reduce = upstream.GetReduceOp(); + for (const auto& candidate : candidates) { + if (IsDownstreamStmtDependReduceOp(reduce, candidate)) { + return candidate; + } + } + return {}; +} + +static std::set GetReduceAxesName( + const ShardableAxesSignature& signature) { + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set res(input_names.begin(), input_names.end()); + for (const auto& n : output_names) { + res.erase(n); + } + return res; +} + +bool ShardableAxesRRFusePolicy::ReduceTreeGrownCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + if (!upstream->IsReduceTree() || !downstream->IsReduceTree()) { + return false; + } + const auto& upstream_tree = + std::get(upstream->stmt_pattern_); + const auto& downstream_tree = + std::get(downstream->stmt_pattern_); + const auto& maybe_downstream_op = GetDownstreamFromCandidate( + upstream_tree.GetRootPattern(), downstream_tree.reduce_patterns_); + if (!maybe_downstream_op.has_value()) { + return false; + } + const pir::Value& reduce_out_value = + upstream_tree.GetRootPattern().GetReduceOp()->result(0); + pir::Operation* downstream_reduce_op = + maybe_downstream_op.value().GetReduceOp(); + const auto& reduce_names = + GetReduceAxesName(axes_info_.GetSignature(downstream_reduce_op)); + for (const auto& n : + axes_info_.GetAxes(downstream_reduce_op->result(0)).axis_names) { + if (reduce_names.count(n) > 0) { + // not meeting the BroadcastEdge condition. + return false; + } + } + return true; +} + +bool ShardableAxesRRFusePolicy::CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) { + // TODO(wuzhanfei) shardable axes policy + return ReduceTreeGrownCanMerge(upstream, downstream); +} + } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h index 43b0634fcb2b6..1917d2f5af4df 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h +++ b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h @@ -18,15 +18,24 @@ namespace cinn::frontend::group_cluster::policy { -class ShardableAxesPolicy final : virtual public Policy { +class ShardableAxesRRFusePolicy final : public Policy { public: - ShardableAxesPolicy(const std::vector& ops, - const pir::ShapeConstraintIRAnalysis* shape_analysis) + ShardableAxesRRFusePolicy( + const std::vector& ops, // NOLINT + const pir::ShapeConstraintIRAnalysis* shape_analysis) // NOLINT : axes_info_(ops, shape_analysis) {} - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + std::string Name() { return "ShardableAxesRRFusePolicy"; } private: + bool ReduceTreeGrownCanMerge(const PatternNodePtr&, const PatternNodePtr&); + std::optional GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector& candidates); ShardableAxesInfoManager axes_info_; + bool IsDownstreamStmtDependReduceOp(pir::Operation* reduce, + const StmtPattern& downstream); }; } // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/common_utils.cc b/paddle/cinn/frontend/group_cluster/common_utils.cc index 304b05193983e..36280069aca18 100644 --- a/paddle/cinn/frontend/group_cluster/common_utils.cc +++ b/paddle/cinn/frontend/group_cluster/common_utils.cc @@ -24,7 +24,7 @@ size_t GetRank(pir::Value value) { return value.type().dyn_cast().dims().size(); } -std::vector GetReduceAxisIdx(const pir::Operation* reduce_op) { +std::vector GetReduceAxisIdx(pir::Operation* reduce_op) { const size_t input_rank = GetRank(reduce_op->operand_source(0)); const auto& attr_val = reduce_op->attributes().at("dim"); CHECK(attr_val.isa<::pir::ArrayAttribute>()); @@ -39,16 +39,21 @@ std::vector GetReduceAxisIdx(const pir::Operation* reduce_op) { CHECK_LT(axis, input_rank); reduce_axis_idx.push_back(axis); } + VLOG(4) << "GetReduceAxisIdx: " << utils::Join(reduce_axis_idx, ","); return reduce_axis_idx; } -bool GetReduceOpKeepDims(const pir::Operation* reduce_op) { +bool GetReduceOpKeepDims(pir::Operation* reduce_op) { const auto& attr_val = reduce_op->attributes().at("keep_dim"); CHECK(attr_val.isa<::pir::BoolAttribute>()); - return attr_val.dyn_cast<::pir::BoolAttribute>(); + return attr_val.dyn_cast<::pir::BoolAttribute>().data(); } -std::string OpsDebugStr(std::vector ops) { +std::string GetPatternName(const StmtPattern& s) { + return std::visit([](const auto& impl) { return impl.name(); }, s); +} + +std::string OpsDebugStr(std::vector ops) { std::stringstream ss; pir::IrPrinter printer(ss); for (const auto* op : ops) { @@ -59,18 +64,17 @@ std::string OpsDebugStr(std::vector ops) { } std::optional> GetBroadcastOpInputOuputValue( - const pir::Operation* op) { + pir::Operation* op) { auto* mut_op = const_cast(op); if (op->isa()) { auto expand_op = mut_op->dyn_cast(); return std::make_pair(expand_op.x(), expand_op.out()); - } - if (op->isa()) { + } else if (op->isa()) { auto broadcast_op = mut_op->dyn_cast(); return std::make_pair(broadcast_op.x(), broadcast_op.out()); + } else { + CHECK(false) << "Unsupported broadcast op: " << op->name(); } - VLOG(4) << "[ShardableAxesSignature] Unsupported Broadcast op: " - << op->name(); return std::nullopt; } } // namespace cinn::frontend::group_cluster @@ -85,12 +89,55 @@ bool IsReducePattern(const StmtPattern& pattern) { return std::holds_alternative(pattern); } +bool IsReduceTreePattern(const StmtPattern& pattern) { + return std::holds_alternative(pattern); +} + +bool IsOpsDependents(const StmtPattern& pattern) { + return std::holds_alternative(pattern); +} + bool IsUnsupportPattern(const StmtPattern& pattern) { return std::holds_alternative(pattern); } -std::vector GetOpsInPattern(const StmtPattern& pattern) { - return std::visit([](const auto& impl) { return impl.ops_; }, pattern); +bool IsReduceTrivialPattern(const StmtPattern& pattern) { + return std::holds_alternative(pattern); +} + +std::unordered_set GetPatternInputValuesIncludeInner( + const StmtPattern& A) { + std::unordered_set result; + for (const auto& op : GetOpsInPattern(A)) { + for (const auto& value : op->operands()) { + result.insert(value.source()); + } + } + return result; +} + +std::unordered_set GetPatternOutputValuesIncludedInner( + const StmtPattern& A) { + std::unordered_set result; + for (const auto& op : GetOpsInPattern(A)) { + for (const auto& value : op->results()) { + result.insert(value); + } + } + return result; +} + +std::unordered_set GetPatternInputValues(const StmtPattern& A) { + auto all_input_values = GetPatternInputValuesIncludeInner(A); + for (const auto& value : GetPatternOutputValuesIncludedInner(A)) { + all_input_values.erase(value); + } + VLOG(4) << "GetPatternInputValues: " << all_input_values.size(); + return all_input_values; +} + +std::vector GetOpsInPattern(const StmtPattern& pattern) { + return std::visit([](const auto& impl) { return impl.ops(); }, pattern); } std::string StmtPatternDebugStr(const StmtPattern& stmt) { @@ -102,18 +149,37 @@ std::string StmtPatternDebugStr(const StmtPattern& stmt) { } StmtPattern MergePattern(const StmtPattern& first, const StmtPattern& second) { - std::vector ops = + std::vector ops = MergeVector(GetOpsInPattern(first), GetOpsInPattern(second)); if (IsUnsupportPattern(first) || IsUnsupportPattern(second)) { return UnsupportPattern(ops); - } else if (IsReducePattern(first) || IsReducePattern(second)) { + } else if (IsReduceTreePattern(first) && IsReduceTreePattern(second)) { + const auto& merged = + ConcatVector(std::get(first).reduce_patterns_, + std::get(second).reduce_patterns_); + return ReduceTreePattern( + merged, std::get(second).GetRootPattern()); + } else if (IsReduceTreePattern(first) && IsTrivialPattern(second)) { + return ReduceTreePlusTrivialPattern(std::get(first), + std::get(second)); + } else if (IsTrivialPattern(first) && IsReducePattern(second)) { return ReducePattern(ops); - } else { + } else if (IsTrivialPattern(first) && IsTrivialPattern(second)) { return TrivialPattern(ops); + } else if (IsHorizontalFusionPattern(first) && + IsHorizontalFusionPattern(second)) { + return HorizontalFusionPattern(ops); + } else { + // Not Implementation. + CHECK(false) << "Found not support merge!"; } } -StmtPattern ConvertToStmtPattern(const pir::Operation* op) { +bool IsHorizontalFusionPattern(const StmtPattern& pattern) { + return std::holds_alternative(pattern); +} + +StmtPattern ConvertToStmtPattern(pir::Operation* op) { const auto& kind = GetOpPatternKind(op); if (kind == hlir::framework::kReduction) { return ReducePattern({op}); @@ -126,4 +192,8 @@ StmtPattern ConvertToStmtPattern(const pir::Operation* op) { } } +ReducePattern ToReducePattern(const StmtPattern& second) { + return std::get(second); +} + } // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/common_utils.h b/paddle/cinn/frontend/group_cluster/common_utils.h index af2b6c5cde97d..2430facb703e5 100644 --- a/paddle/cinn/frontend/group_cluster/common_utils.h +++ b/paddle/cinn/frontend/group_cluster/common_utils.h @@ -43,42 +43,79 @@ using OpPatternKind = cinn::hlir::framework::OpPatternKind; OpPatternKind GetOpPatternKind(const ::pir::Operation* op); size_t GetRank(pir::Value value); -std::vector GetReduceAxisIdx(const pir::Operation* reduce_op); -bool GetReduceOpKeepDims(const pir::Operation* reduce_op); -std::string OpsDebugStr(std::vector ops); +std::vector GetReduceAxisIdx(pir::Operation* reduce_op); +bool GetReduceOpKeepDims(pir::Operation* reduce_op); +std::string OpsDebugStr(std::vector ops); std::optional> GetBroadcastOpInputOuputValue( - const pir::Operation* op); + pir::Operation* op); } // namespace cinn::frontend::group_cluster namespace cinn::frontend::group_cluster { bool IsTrivialPattern(const StmtPattern& pattern); +bool IsHorizontalFusionPattern(const StmtPattern& pattern); bool IsReducePattern(const StmtPattern& pattern); +bool IsReduceTreePattern(const StmtPattern& pattern); bool IsUnsupportPattern(const StmtPattern& pattern); +bool IsReduceTrivialPattern(const StmtPattern& pattern); template -void ExtendVector(std::vector* first, const std::vector& second) { - std::unordered_set visited = - std::unordered_set(first->begin(), first->end()); - for (auto iter = second.begin(); iter != second.end(); iter++) { - if (visited.find(*iter) == visited.end()) { - visited.emplace(*iter); - first->emplace_back(*iter); +void RemoveFromVector(std::vector* vec, T item) { + auto iter = std::find(vec->begin(), vec->end(), item); + if (iter != vec->end()) { + vec->erase(iter); + } +} + +template +std::vector ConcatVector(const std::vector& first, + const std::vector& second) { + std::vector result = first; + result.insert(result.end(), second.begin(), second.end()); + return result; +} + +template +std::vector FilterVector(const std::vector& first, const F& func) { + std::vector result; + for (const auto& i : first) { + if (func(i)) { + result.push_back(i); } } + return result; } template -std::vector MergeVector(const std::vector& first, - const std::vector& second) { - std::vector result = std::vector(first); - ExtendVector(&result, second); +std::set ToSet(const std::vector& input) { + std::set result(input.begin(), input.end()); return result; } -std::vector GetOpsInPattern(const StmtPattern& pattern); +template +bool IsAnyFirstInSecond(const std::vector& first, + const std::vector& second) { + const auto& second_set = ToSet(second); + for (const auto& ele : first) { + if (second_set.count(ele)) { + return true; + } + } + return false; +} + +template +std::vector UniqueVectorBySet(const std::vector& v) { + std::set unique(v.begin(), v.end()); + return std::vector(unique.begin(), unique.end()); +} + +std::vector GetOpsInPattern(const StmtPattern& pattern); std::string StmtPatternDebugStr(const StmtPattern& pattern); StmtPattern MergePattern(const StmtPattern& first, const StmtPattern& second); +ReducePattern ToReducePattern(const StmtPattern& second); +std::string GetPatternName(const StmtPattern& s); -StmtPattern ConvertToStmtPattern(const pir::Operation* op); +StmtPattern ConvertToStmtPattern(pir::Operation* op); +std::unordered_set GetPatternInputValues(const StmtPattern& A); } // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/group_cluster.h b/paddle/cinn/frontend/group_cluster/group_cluster.h index 950c3b77942a6..5a09b5e2ace95 100644 --- a/paddle/cinn/frontend/group_cluster/group_cluster.h +++ b/paddle/cinn/frontend/group_cluster/group_cluster.h @@ -15,39 +15,69 @@ #pragma once #include "paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h" +#include "paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h" #include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h" #include "paddle/cinn/frontend/group_cluster/pattern_graph.h" namespace cinn::frontend { -inline std::vector> ClusterOps( - const cinn::dialect::GroupOp& group_op) { +inline std::vector ClusterOps( + const std::vector& origin_ops, + bool with_horizontal_fusion = false) { + CHECK_GT(origin_ops.size(), 0); + VLOG(4) << "Start Cluster Ops!"; + VLOG(4) << "Input Group with size " << origin_ops.size() << " :\n" + << group_cluster::OpsDebugStr(origin_ops); + + std::vector outputs; const auto& ops = [&] { - std::vector ops; - for (const auto& op : group_op.GetOperators()) { + std::vector ops; + for (const auto& op : origin_ops) { + if (op->name() == "cf.yield") { // just skip cf.yield. + for (auto& operand : op->operands()) { + outputs.push_back(operand.source()); + } + continue; + } ops.emplace_back(op); } return ops; }(); - VLOG(4) << "Start Cluster Ops!"; - VLOG(4) << "Input Group with size " << ops.size() << " :\n" - << group_cluster::OpsDebugStr(ops); + pir::Program* program = ops.at(0)->GetParentProgram(); const auto* shape_analysis = - &pir::ShapeAnalysisManager::Instance().Get(group_op->GetParentProgram()); + &pir::ShapeAnalysisManager::Instance().Get(program); - auto shardable_axes_policy = - std::make_shared( + // const auto& shardable_axes_policy = + // std::make_shared( + // ops, shape_analysis); + VLOG(4) << "Start Create Policies and PolicyManager!"; + const auto& relative_judge_policy = + std::make_shared( ops, shape_analysis); - auto general_topo_policy = + + const auto& general_topo_policy = std::make_shared(); auto policy_manager = group_cluster::policy::PolicyManager( - {shardable_axes_policy, general_topo_policy}); + {relative_judge_policy, general_topo_policy}); + + auto topo_manager = group_cluster::policy::PolicyManager( + {relative_judge_policy, general_topo_policy}); + + VLOG(4) << "Start Create PatternGraph"; + group_cluster::PatternGraph graph(ops, outputs, policy_manager, topo_manager); + auto result = graph.ClusterOps(with_horizontal_fusion); + + VLOG(4) << "End Cluster Ops! result size:" << result.size(); + for (const auto& node : result) { + VLOG(4) << "\n" + << node->DebugStr() << "\n" + << group_cluster::StmtPatternDebugStr(node->stmt_pattern_); + } - group_cluster::PatternGraph graph(ops, policy_manager); - return graph.ClusterOps(); + return result; } } // namespace cinn::frontend diff --git a/paddle/cinn/frontend/group_cluster/pattern.h b/paddle/cinn/frontend/group_cluster/pattern.h index c4d7928c28ba2..03947b312565f 100644 --- a/paddle/cinn/frontend/group_cluster/pattern.h +++ b/paddle/cinn/frontend/group_cluster/pattern.h @@ -14,40 +14,110 @@ #pragma once +#include #include #include +#include "glog/logging.h" #include "paddle/pir/include/core/operation.h" namespace cinn::frontend::group_cluster { +class TrivialPattern; +class ReducePattern; +class ReduceTreePattern; +class ReduceTreePlusTrivialPattern; +class UnsupportPattern; +class HorizontalFusionPattern; + +template +void ExtendVector(std::vector* first, const std::vector& second) { + std::unordered_set visited = + std::unordered_set(first->begin(), first->end()); + for (auto iter = second.begin(); iter != second.end(); iter++) { + if (visited.find(*iter) == visited.end()) { + visited.emplace(*iter); + first->emplace_back(*iter); + } + } +} + +template +std::vector MergeVector(const std::vector& first, + const std::vector& second) { + std::vector result = std::vector(first); + ExtendVector(&result, second); + return result; +} + struct TrivialPattern { - explicit TrivialPattern(const std::vector& ops) + explicit TrivialPattern(const std::vector& ops) : ops_(ops) {} - std::vector ops_; + std::vector ops_; + static std::string name() { return "Trivial"; } + std::vector ops() const { return ops_; } }; struct ReducePattern { - explicit ReducePattern(const std::vector& ops) - : ops_(ops) {} - std::vector ops_; + explicit ReducePattern(const std::vector& ops) : ops_(ops) {} + std::vector ops_; + std::vector ops() const { return ops_; } + pir::Operation* GetReduceOp() const { return ops_.back(); } + static std::string name() { return "Reduce"; } +}; + +struct ReduceTreePattern { + explicit ReduceTreePattern(const std::vector& v, + const ReducePattern& root) + : reduce_patterns_(v), root_(root) {} + std::vector reduce_patterns_; + const ReducePattern& GetRootPattern() const { return root_; } + std::vector ops() const { + std::vector result; + for (const auto& reduce_pattern : reduce_patterns_) { + result = MergeVector(result, reduce_pattern.ops()); + } + return result; + } + static std::string name() { return "ReduceTree"; } + + private: + ReducePattern root_; +}; + +struct ReduceTreePlusTrivialPattern { + explicit ReduceTreePlusTrivialPattern(const ReduceTreePattern& tree, + const TrivialPattern& sink_trivial) + : tree(tree), sink_trivial(sink_trivial) {} + ReduceTreePattern tree; + TrivialPattern sink_trivial; + std::vector ops() const { + return MergeVector(tree.ops(), sink_trivial.ops()); + } + static std::string name() { return "ReduceTree+Trivial"; } + std::vector fake_reduce_iter_idx; }; struct UnsupportPattern { - explicit UnsupportPattern(const std::vector& ops) + explicit UnsupportPattern(const std::vector& ops) + : ops_(ops) {} + std::vector ops_; + std::vector ops() const { return ops_; } + static std::string name() { return "Unsupport"; } +}; + +struct HorizontalFusionPattern { + explicit HorizontalFusionPattern(const std::vector& ops) : ops_(ops) {} - std::vector ops_; + std::vector ops_; + std::vector ops() const { return ops_; } + static std::string name() { return "HorizontalFusionPattern"; } }; -// UnsupportedPattern can't fuse with any pattern -// Step 1: T x T|R => T|R TrivialPattern can always fuse with -// downstream Step 2: R x T|R => R Use Shardable Axes Policy -// to judge - -// If we want add MatmulPattern => -// StmtPattern = std::variant; Fusion with different Pattern will have specialized logic -// to Judge, Update policy logic for MatmulPattern -using StmtPattern = - std::variant; +using StmtPattern = std::variant; } // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_graph.cc b/paddle/cinn/frontend/group_cluster/pattern_graph.cc index 57d2fd1388f77..bbd49d1b17503 100644 --- a/paddle/cinn/frontend/group_cluster/pattern_graph.cc +++ b/paddle/cinn/frontend/group_cluster/pattern_graph.cc @@ -16,59 +16,124 @@ namespace cinn::frontend::group_cluster { -std::vector> PatternGraph::ClusterOps() { +std::vector PatternGraph::ClusterOps( + bool with_horizontal_fusion) { + VLOG(4) << "[Group Cluster] Initial Condition: " << GraphInfo(); + + VLOG(4) << "[Group Cluster] Start SinkTrivialPattern"; SinkTrivialPattern(); - FuseReducePattern(); - // TODO(wuzhanfei) need sort here, or do not return from all_pattern_nodes_ - std::vector> result; - std::transform(all_pattern_nodes_.begin(), - all_pattern_nodes_.end(), - std::back_inserter(result), - [](const PatternNodePtr node) { return node->GetOps(); }); - return result; + VLOG(4) << "[Group Cluster] After SinkTrivialPattern: " << GraphInfo(); + + // ReducePattern -> ReduceTreePattern + VLOG(4) << "[Group Cluster] Start ReduceLiftReduceTree"; + ReduceLiftReduceTree(); + VLOG(4) << "[Group Cluster] After ReduceLiftReduceTree: " << GraphInfo(); + + // ReduceTreePattern + ReduceTreePattern fusion + VLOG(4) << "[Group Cluster] Start ReduceTreeGrown"; + ReduceTreeGrown(); + VLOG(4) << "[Group Cluster] After ReduceTreeGrown: " << GraphInfo(); + + // ReduceTreePattern + TrivialPattern fusion. + VLOG(4) << "[Group Cluster] Start ReduceTree_Trivial_Fusion"; + ReduceTree_Trivial_Fusion(); + VLOG(4) << "[Group Cluster] After ReduceTree_Trivial_Fusion: " << GraphInfo(); + + // Horizontal fusion. + if (with_horizontal_fusion) { + VLOG(4) << "[Group Cluster] Start HorizontalFusion"; + HorizontalFusion(); + VLOG(4) << "[Group Cluster] After HorizontalFusion: " << GraphInfo(); + } + + return SortByTopoOrder(); } -void PatternGraph::SinkTrivialPattern() { - // TODO(wuzhanfei): need consider Unsupport op here - const auto FindTrivialNode = - [](std::unordered_set all_nodes) -> PatternNodePtr { - for (PatternNodePtr node : all_nodes) { - if (node->IsTrivial() && !node->downstream_.empty()) return node; +std::vector PatternGraph::SortByTopoOrder() { + // sort all_pattern_nodes_ by topo order. + std::vector res; + std::list topo_queue; + std::map degree; + for (const auto& node : all_pattern_nodes_) { + degree[node] = node->upstream_.size(); + if (degree[node] == 0) { + topo_queue.push_back(node); } - return nullptr; - }; - - PatternNodePtr upstream; - while ((upstream = FindTrivialNode(all_pattern_nodes_)) != nullptr) { - std::vector fusion_candidate = upstream->downstream_; - upstream->downstream_.clear(); - for (const auto& downstream : fusion_candidate) { - PatternNodePtr new_node = - std::make_shared(upstream, downstream); - AppendNode(new_node); - RemoveNode(downstream); + } + while (!topo_queue.empty()) { + PatternNodePtr node = topo_queue.front(); + topo_queue.pop_front(); + res.push_back(node); + for (const auto& downstream_op : node->downstream_) { + degree[downstream_op] = degree[downstream_op] - 1; + if (degree[downstream_op] == 0) { + topo_queue.push_back(downstream_op); + } } - RemoveNode(upstream); } + return res; } -void PatternGraph::FuseReducePattern() { - // TODO(wuzhanfei) reduce fusion, similar with implementation in backend +void PatternGraph::SinkTrivialPattern() { + GraphTransformer< + NodePattern, + And>, + IsNotOutputNodeMatcher>, + MergeTrivialPatternOperation>(this); } -PatternGraph::PatternGraph(const std::vector& ops, - const policy::PolicyManager policy_manager) - : policy_manager_(policy_manager) { - std::unordered_map op_to_node_map; +void PatternGraph::ReduceLiftReduceTree() { + GraphTransformer< + NodePattern, + And, StmtPatternGraphMatcher>, + LiftReduceToReduceTreeOperation>(this); +} + +void PatternGraph::HorizontalFusion() { + GraphTransformer, + LiftToHorizontalFusionPatternOperation>(this); + + GraphTransformer(this); +} + +void PatternGraph::ReduceTreeGrown() { + GraphTransformer, + MergeReduceTreeOperation>(this); +} + +void PatternGraph::ReduceTree_Trivial_Fusion() { + GraphTransformer< + NodePattern, + And, + MergeReduceTreeAndTrivialOperation>(this); +} + +PatternGraph::PatternGraph(const std::vector& ops, + const std::vector& outputs, + const policy::PolicyManager policy_manager, + const policy::PolicyManager topo_manager) + : policy_manager_(policy_manager), + topo_manager_(topo_manager), + outputs_(outputs) { + std::unordered_map op_to_node_map; + + VLOG(4) << "len(outputs) = " << outputs_.size(); + for (const auto& v : outputs) { + VLOG(4) << "output is" << OpsDebugStr({v.defining_op()}); + } - for (int i = 0; i < ops.size(); ++i) { - PatternNodePtr node = std::make_shared(ops[i]); - op_to_node_map[ops[i]] = node; + for (const auto& op : ops) { + PatternNodePtr node = std::make_shared(op); + op_to_node_map[op] = node; all_pattern_nodes_.emplace(node); - node->sink_op_ = ops[i]; + node->sink_op_ = op; } - for (const pir::Operation* op : ops) { + for (pir::Operation* op : ops) { PatternNodePtr cur_node = op_to_node_map[op]; // add upstream nodes @@ -77,7 +142,6 @@ PatternGraph::PatternGraph(const std::vector& ops, if (op_to_node_map.find(input_op) != op_to_node_map.end()) { PatternNodePtr upstream_node = op_to_node_map[input_op]; cur_node->upstream_.push_back(upstream_node); - upstream_node->downstream_.push_back(cur_node); } } @@ -91,44 +155,81 @@ PatternGraph::PatternGraph(const std::vector& ops, if (op_to_node_map.find(output_op) != op_to_node_map.end()) { PatternNodePtr downstream_node = op_to_node_map[output_op]; cur_node->downstream_.push_back(downstream_node); - downstream_node->upstream_.push_back(cur_node); } } } - - if (cur_node->upstream_.empty()) { - entrance_nodes_.emplace(cur_node); - } - - if (cur_node->downstream_.empty()) { - exit_nodes_.emplace(cur_node); - } } VLOG(4) << "PatternGraph Created, pattern node size: " << all_pattern_nodes_.size(); } -void PatternGraph::RemoveNode(PatternNodePtr node) { +void PatternGraph::RemoveNode(const PatternNodePtr& node) { + VLOG(4) << "Start Remove: " << node; if (all_pattern_nodes_.find(node) != all_pattern_nodes_.end()) { + VLOG(4) << "Removed! "; all_pattern_nodes_.erase(node); } - if (entrance_nodes_.find(node) != entrance_nodes_.end()) { - entrance_nodes_.erase(node); + + for (PatternNodePtr& upstream : node->upstream_) { + RemoveFromVector(&upstream->downstream_, node); } - if (exit_nodes_.find(node) != exit_nodes_.end()) { - exit_nodes_.erase(node); + + for (PatternNodePtr& downstream : node->downstream_) { + RemoveFromVector(&downstream->upstream_, node); } } -void PatternGraph::AppendNode(PatternNodePtr node) { +void PatternGraph::AppendNode(const PatternNodePtr& node) { all_pattern_nodes_.emplace(node); - if (node->upstream_.empty()) { - entrance_nodes_.emplace(node); - } - if (node->downstream_.empty()) { - exit_nodes_.emplace(node); +} + +std::string PatternGraph::GraphInfo() const { + std::stringstream ss; + ss << "\n========= GraphInfo ==========="; + for (const auto& v : all_pattern_nodes_) { + ss << "\n" << v->DebugStr(); + ss << "\n IsOutput: " << IsOutputNodeMatcher()(*this, v); } + ss << "\n==============================="; + return ss.str(); } +PatternNodePtr PatternGraph::MergeNode(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) { + PatternNodePtr merged_node = + std::make_shared(upstream, downstream); + + // deal with the reference. + ExtendVector(&merged_node->upstream_, upstream->upstream_); + ExtendVector(&merged_node->upstream_, downstream->upstream_); + RemoveFromVector(&merged_node->upstream_, upstream); + + ExtendVector(&merged_node->downstream_, upstream->downstream_); + ExtendVector(&merged_node->downstream_, downstream->downstream_); + RemoveFromVector(&merged_node->downstream_, downstream); + + for (const auto& upstream_node : merged_node->upstream_) { + upstream_node->downstream_.push_back(merged_node); + RemoveFromVector(&upstream_node->downstream_, upstream); + RemoveFromVector(&upstream_node->downstream_, downstream); + } + for (const auto& downstream_node : merged_node->downstream_) { + downstream_node->upstream_.push_back(merged_node); + RemoveFromVector(&downstream_node->downstream_, upstream); + RemoveFromVector(&downstream_node->downstream_, downstream); + } + + const auto vec_unique = [](const std::vector& vec) { + auto set = std::unordered_set(vec.begin(), vec.end()); + return set.size() == vec.size(); + }; + + CHECK(vec_unique(merged_node->upstream_)); + CHECK(vec_unique(merged_node->downstream_)); + + // deal with the graph storage. + AppendNode(merged_node); + return merged_node; +} } // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_graph.h b/paddle/cinn/frontend/group_cluster/pattern_graph.h index cc3c811eba519..9f151f25558c7 100644 --- a/paddle/cinn/frontend/group_cluster/pattern_graph.h +++ b/paddle/cinn/frontend/group_cluster/pattern_graph.h @@ -14,31 +14,347 @@ #pragma once #include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" +#include "paddle/cinn/frontend/group_cluster/cluster_policy/relative_judge_policy.h" #include "paddle/cinn/frontend/group_cluster/common_utils.h" #include "paddle/cinn/frontend/group_cluster/pattern_node.h" namespace cinn::frontend::group_cluster { +struct PatternNodePtrHash { + size_t operator()(const PatternNodePtr& node) const { + return std::hash()(node.get()); + } +}; + +struct PatternNodePtrCompare { + bool operator()(const std::shared_ptr& a, + const std::shared_ptr& b) const { + return a.get() == b.get(); + } +}; + +using PatternNodePtrSet = std:: + unordered_set; + class PatternGraph { public: - PatternGraph(const std::vector& ops, - const policy::PolicyManager policy_manager); + PatternGraph(const std::vector& ops, + const std::vector& outputs, + const policy::PolicyManager policy_manager, + const policy::PolicyManager topo_manager); - std::vector> ClusterOps(); + std::vector ClusterOps(bool with_horizontal_fusion = false); private: void SinkTrivialPattern(); + void HorizontalFusion(); void FuseReducePattern(); + void ReduceLiftReduceTree(); + void ReduceTreeGrown(); + void ReduceTree_Trivial_Fusion(); - void RemoveNode(PatternNodePtr node); - void AppendNode(PatternNodePtr node); + void RemoveNode(const PatternNodePtr& node); + void AppendNode(const PatternNodePtr& node); + std::string GraphInfo() const; + PatternNodePtr MergeNode(const PatternNodePtr& upstream, + const PatternNodePtr& downstream); + std::vector SortByTopoOrder(); - private: - std::unordered_set all_pattern_nodes_; - std::unordered_set entrance_nodes_; - std::unordered_set exit_nodes_; + friend class IsOutputNodeMatcher; + friend class IsNotOutputNodeMatcher; + friend class CanFuseReduceTreeAndTrivialMatcher; + friend class CanFuseReduceTreeMatcher; + + friend class MergeTrivialPatternOperation; + friend class LiftReduceToReduceTreeOperation; + friend class MergeReduceTreeOperation; + friend class MergeReduceTreeAndTrivialOperation; + friend class HorizontalFusionOperation; + friend class LiftToHorizontalFusionPatternOperation; + + public: + PatternNodePtrSet all_pattern_nodes_; + std::vector outputs_; + policy::PolicyManager policy_manager_; + policy::PolicyManager topo_manager_; +}; + +// PatternGraphFusionOperation := (GraphMatcher, GraphOperation) +// SearchAlgorithm := NodePattern | EdgePattern | GraphMatcher +// GraphOperation := Merge2Node | SplitNode | SplitAllAndMergeDownstream + +struct NodePattern {}; +struct EdgePattern {}; +struct GraphPattern {}; // not implemented. +struct NodePairPattern {}; // not implemented. + +template +struct SearchAlgorithm {}; + +template +struct SearchAlgorithm { + PatternGraph* graph_; + PatternNodePtrSet visited_nodes; + + explicit SearchAlgorithm(PatternGraph* graph) { + VLOG(4) << "Create NodePattern algorithm."; + graph_ = graph; + } - const policy::PolicyManager policy_manager_; + PatternNodePtr FindMatchedNode() { + for (PatternNodePtr iter_node : graph_->all_pattern_nodes_) { + if (GraphMatcher()(*graph_, iter_node) && + !visited_nodes.count(iter_node)) { + visited_nodes.insert(iter_node); + VLOG(4) << "Find Matched Node: " << iter_node; + return iter_node; + } + } + VLOG(4) << "Can't find matched node any more."; + return nullptr; + } + + void operator()() { + while (true) { + PatternNodePtr node = FindMatchedNode(); + if (node == nullptr) { + break; + } + GraphOperation()(graph_, node); + } + } +}; + +template +struct SearchAlgorithm { + PatternGraph* graph_; + std::set> visited_node_pair; + explicit SearchAlgorithm(PatternGraph* graph) { + VLOG(4) << "Create NodePairPattern algorithm."; + graph_ = graph; + } + std::optional> FindMatchedPair() { + for (PatternNodePtr i : graph_->all_pattern_nodes_) { + for (PatternNodePtr j : graph_->all_pattern_nodes_) { + if (i == j) continue; + const auto& pair = std::make_pair(i, j); + if (GraphMatcher()(*graph_, i, j) && !visited_node_pair.count(pair)) { + visited_node_pair.insert(pair); + VLOG(4) << "Find Matched Node Pair: (" << i << ", " << j << ")"; + return pair; + } + } + } + VLOG(4) << "Can't find matched node any more."; + return {}; + } + void operator()() { + while (true) { + const auto& node = FindMatchedPair(); + if (!node.has_value()) break; + const auto& [i, j] = node.value(); + GraphOperation()(graph_, i, j); + } + } +}; + +// Operation + +struct MergeReduceTreeOperation { + void operator()(PatternGraph* graph, PatternNodePtr node) { + CHECK_EQ(node->downstream_.size(), 1); + auto downstream = node->downstream_.at(0); + auto merged_node = graph->MergeNode(node, downstream); + graph->RemoveNode(downstream); + graph->RemoveNode(node); + VLOG(4) << "MergeReduceTreeOperation: \nupstream " << node->DebugStr() + << "\ndownstream " << downstream->DebugStr() << "\nmerged " + << merged_node->DebugStr(); + } +}; + +struct MergeReduceTreeAndTrivialOperation { + void operator()(PatternGraph* graph, PatternNodePtr node) { + CHECK_EQ(node->downstream_.size(), 1); + auto downstream = node->downstream_.at(0); + auto fake_reduce_iter_idx = + graph->policy_manager_.GetFakeReduceIterIdx(node, downstream); + PatternNodePtr merged_node = graph->MergeNode(node, downstream); + std::get(merged_node->stmt_pattern_) + .fake_reduce_iter_idx = fake_reduce_iter_idx; + graph->RemoveNode(downstream); + graph->RemoveNode(node); + VLOG(4) << "MergeReduceTreeAndTrivialOperation: \nupstream " + << node->DebugStr() << "\ndownstream " << downstream->DebugStr() + << "\nmerged " << merged_node->DebugStr(); + } }; +struct LiftReduceToReduceTreeOperation { + void operator()(PatternGraph* graph, PatternNodePtr node) { + const auto& reduce_pattern = ToReducePattern(node->stmt_pattern_); + node->stmt_pattern_ = ReduceTreePattern({reduce_pattern}, reduce_pattern); + VLOG(4) << "LiftReduceToReduceTreeOperation: \nnode " << node->DebugStr(); + } +}; + +struct MergeTrivialPatternOperation { + void operator()(PatternGraph* graph, PatternNodePtr upstream) { + std::vector fusion_candidate = upstream->downstream_; + upstream->downstream_.clear(); + for (const auto& downstream : fusion_candidate) { + if (downstream->IsReduce() || downstream->IsTrivial()) { + auto merged_node = graph->MergeNode(upstream, downstream); + graph->RemoveNode(downstream); + VLOG(4) << "MergeTrivialPatternOperation: \nupstream " + << upstream->DebugStr() << "\ndownstream " + << downstream->DebugStr() << "\nmerged " + << merged_node->DebugStr(); + } else { + upstream->downstream_.push_back(downstream); + } + } + if (upstream->downstream_.empty()) { + graph->RemoveNode(upstream); + } + } +}; + +struct LiftToHorizontalFusionPatternOperation { + void operator()(PatternGraph* graph, PatternNodePtr i) { + i->stmt_pattern_ = + HorizontalFusionPattern(GetOpsInPattern(i->stmt_pattern_)); + } +}; + +// Matcher + +template +struct AlwaysTrue { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return true; + } +}; + +template +struct StmtPatternGraphMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return GetPatternName(node->stmt_pattern_) == StmtPattern::name(); + } +}; + +struct CanFuseRxTMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return (node->IsReduceTree() && !node->downstream_.empty() && + node->downstream_.at(0)->IsTrivial()); + } +}; + +struct CanFuseReduceTreeMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return StmtPatternGraphMatcher()(graph, node) && + !node->downstream_.empty() && + node->downstream_.at(0)->IsReduceTree() && + graph.policy_manager_.CanFuse(node, node->downstream_.at(0)); + } +}; + +struct CanFuseReduceTreeAndTrivialMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return StmtPatternGraphMatcher()(graph, node) && + !node->downstream_.empty() && node->downstream_.at(0)->IsTrivial() && + graph.policy_manager_.CanFuse(node, node->downstream_.at(0)); + } +}; + +struct HorizontalFusionConstrain { + bool operator()(const PatternGraph& graph, + const PatternNodePtr& first, + const PatternNodePtr& second) { + if (!StmtPatternGraphMatcher()(graph, first)) { + return false; + } + if (!StmtPatternGraphMatcher()(graph, second)) { + return false; + } + const auto& first_dim = first->sink_op_->result(0) + .type() + .dyn_cast() + .dims(); + const auto& second_dim = second->sink_op_->result(0) + .type() + .dyn_cast() + .dims(); + return graph.topo_manager_.CanFuse(first, second) && + first_dim == second_dim; + } +}; + +struct HorizontalFusionOperation { + void operator()(PatternGraph* graph, + const PatternNodePtr& i, + const PatternNodePtr& j) { + CHECK(GetPatternName(i->stmt_pattern_) == HorizontalFusionPattern::name()); + CHECK(GetPatternName(j->stmt_pattern_) == HorizontalFusionPattern::name()); + graph->MergeNode(i, j); + graph->RemoveNode(i); + graph->RemoveNode(j); + } +}; + +struct NonSinkNodeMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return !node->downstream_.empty(); + } +}; + +struct IsOutputNodeMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + bool res = IsAnyFirstInSecond(node->sink_op_->results(), graph.outputs_); + return res; + } +}; + +struct IsNotOutputNodeMatcher { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + bool res = !IsOutputNodeMatcher()(graph, node); + return res; + } +}; + +template +struct DownstreamSmallerThan { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return node->downstream_.size() < N; + } +}; + +template +struct And { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return A()(graph, node) && B()(graph, node); + } +}; + +template +struct Or { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return A()(graph, node) || B()(graph, node); + } +}; + +template +struct Not { + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return !A()(graph, node); + } +}; + +template +void GraphTransformer(PatternGraph* graph) { + VLOG(4) << "Start GraphTransformer..."; + auto alog = SearchAlgorithm(graph); + alog(); +} + } // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_node.cc b/paddle/cinn/frontend/group_cluster/pattern_node.cc index 50c287e679bb4..342fc36847229 100644 --- a/paddle/cinn/frontend/group_cluster/pattern_node.cc +++ b/paddle/cinn/frontend/group_cluster/pattern_node.cc @@ -16,57 +16,42 @@ namespace cinn::frontend::group_cluster { -PatternNode::PatternNode(const pir::Operation* op) +PatternNode::PatternNode(pir::Operation* op) : sink_op_(op), stmt_pattern_(ConvertToStmtPattern(op)) {} PatternNode::PatternNode(PatternNodePtr fused_up_node, PatternNodePtr fused_down_node) : sink_op_(fused_down_node->sink_op_), stmt_pattern_(MergePattern(fused_up_node->stmt_pattern_, - fused_down_node->stmt_pattern_)) { - const auto FindFromVector = - [](std::vector vec, - PatternNodePtr item) -> std::vector::iterator { - return std::find(vec.begin(), vec.end(), item); - }; + fused_down_node->stmt_pattern_)) {} - ExtendVector(&upstream_, fused_up_node->upstream_); - ExtendVector(&upstream_, fused_down_node->upstream_); - - upstream_.erase(FindFromVector(upstream_, fused_up_node)); - - ExtendVector(&downstream_, fused_up_node->downstream_); - ExtendVector(&downstream_, fused_down_node->downstream_); - downstream_.erase(FindFromVector(downstream_, fused_down_node)); - - std::vector::iterator iter; - for (const auto& upstream_node : upstream_) { - iter = FindFromVector(upstream_node->downstream_, fused_up_node); - if (iter != upstream_node->downstream_.end()) { - upstream_node->downstream_.erase(iter); - } - iter = FindFromVector(upstream_node->downstream_, fused_down_node); - if (iter != upstream_node->downstream_.end()) { - upstream_node->downstream_.erase(iter); - } - } - - for (const auto& downstream_node : downstream_) { - iter = FindFromVector(downstream_node->upstream_, fused_up_node); - if (iter != downstream_node->upstream_.end()) { - downstream_node->upstream_.erase(iter); - } - iter = FindFromVector(downstream_node->upstream_, fused_down_node); - if (iter != downstream_node->upstream_.end()) { - downstream_node->upstream_.erase(iter); - } - } -} - -std::vector PatternNode::GetOps() const { +std::vector PatternNode::GetOps() const { return GetOpsInPattern(stmt_pattern_); } bool PatternNode::IsTrivial() const { return IsTrivialPattern(stmt_pattern_); } +bool PatternNode::IsReduce() const { return IsReducePattern(stmt_pattern_); } +bool PatternNode::IsReduceTree() const { + return IsReduceTreePattern(stmt_pattern_); +} +bool PatternNode::IsUnsupport() const { + return IsUnsupportPattern(stmt_pattern_); +} +bool PatternNode::IsReduceTrivial() const { + return IsReduceTrivialPattern(stmt_pattern_); +} +std::string PatternNode::DebugStr() const { + std::stringstream ss; + ss << "Node: " << this << ", Pattern: " << GetPatternName(stmt_pattern_) + << "\n -u>: "; + for (const auto& u : upstream_) { + ss << u << ", "; + } + ss << "\n ; - explicit PatternNode(const pir::Operation* op); + explicit PatternNode(pir::Operation* op); explicit PatternNode(PatternNodePtr fused_up_node, PatternNodePtr fused_down_node); bool IsTrivial() const; - std::vector GetOps() const; + bool IsReduce() const; + bool IsReduceTree() const; + bool IsUnsupport() const; + bool IsReduceTrivial() const; + + std::vector GetOps() const; StmtPattern stmt_pattern_; - const pir::Operation* sink_op_; + pir::Operation* sink_op_; std::vector upstream_; std::vector downstream_; + + std::string DebugStr() const; }; using PatternNodePtr = PatternNode::PatternNodePtr; diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc index 2dbe30c4447b7..40008b51a54f2 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc @@ -65,7 +65,6 @@ void GroupOp::Build(pir::Builder& builder, // NOLINT std::unique_ptr&& block) { VLOG(4) << "Start build GroupOp"; if (block && !block->empty()) { - // IR_ENFORCE(block->back().isa()); PADDLE_ENFORCE_EQ(block->back().isa(), true); auto& op = block->back(); for (size_t i = 0; i < op.num_operands(); ++i) { @@ -83,7 +82,10 @@ pir::Block* GroupOp::block() { pir::Block* GroupOp::block() const { pir::Region& region = (*this)->region(0); - CHECK(!region.empty()); + PADDLE_ENFORCE_EQ(region.empty(), + false, + ::common::errors::Unavailable( + "Required GroupOp's region must not be emptpy.")); return ®ion.front(); } @@ -156,7 +158,16 @@ pir::Block* FusionOp::block() { return ®ion.front(); } -std::vector FusionOp::GetOperators() { +pir::Block* FusionOp::block() const { + pir::Region& region = (*this)->region(0); + PADDLE_ENFORCE_EQ(region.empty(), + false, + ::common::errors::Unavailable( + "Required FusionOp's region must not be emptpy.")); + return ®ion.front(); +} + +std::vector FusionOp::GetOperators() const { std::vector rt_ops; for (auto& op : *block()) { rt_ops.push_back(&op); @@ -305,7 +316,9 @@ void GenerateShapeOp::Build( if (inputs.empty()) { VLOG(3) << "GenerateShapeOp inputs is empty"; for (const auto& attr : output_dim_exprs) { - CHECK(attr.isa()); + PADDLE_ENFORCE(attr.isa(), + ::common::errors::PreconditionNotMet( + "Reqiured attr must be Int64Attribute.")); } } argument.AddInputs(inputs); @@ -467,11 +480,15 @@ bool GenerateShapeOp::InferSymbolicShape( const auto attr_dim_exprs = [&] { std::vector dim_exprs{}; pir::Attribute dim_expr_attr = this->attributes().at("output_dim_exprs"); - CHECK(dim_expr_attr.isa()); + PADDLE_ENFORCE(dim_expr_attr.isa(), + ::common::errors::PreconditionNotMet( + "Required dim_expr_attr is ArrayAttribute.")); auto array = dim_expr_attr.dyn_cast(); for (int i = 0; i < array.size(); ++i) { const auto& dim_expr = ConvertAttributeToDimExpr(array.at(i)); - CHECK(dim_expr.has_value()); + PADDLE_ENFORCE(dim_expr.has_value(), + ::common::errors::PreconditionNotMet( + "Required dim_expr.has_value()==true.")); dim_exprs.push_back(dim_expr.value()); } return dim_exprs; @@ -481,7 +498,9 @@ bool GenerateShapeOp::InferSymbolicShape( this->attributes().at("symbol_bindings"); auto symbol_bindings = ConvertAttributeToSymbolBindings(symbol_bindings_attr); - CHECK(symbol_bindings.has_value()); + PADDLE_ENFORCE(symbol_bindings.has_value(), + ::common::errors::PreconditionNotMet( + "Required symbol_bindings.has_value()==true.")); return symbol_bindings.value(); }(); auto DimExprs4InputDim = diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h index f27908438d3b9..34c53ed2ebe6b 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h @@ -77,7 +77,8 @@ class IR_API FusionOp : public pir::Op { const cinn::dialect::GroupInfo &group_info); pir::Block *block(); - std::vector GetOperators(); + pir::Block *block() const; + std::vector GetOperators() const; void VerifySig(); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt b/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt index e329b8886f18b..2cadf976c79bd 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt +++ b/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt @@ -7,7 +7,7 @@ set(cinn_transforms_deps cinn_op_dialect op_dialect_vjp cinn_runtime_dialect - # group_cluster + group_cluster pir_compiler) cinn_cc_library(cinn_transforms SRCS ${cinn_transforms_srcs} DEPS @@ -16,4 +16,4 @@ cinn_cc_library(cinn_transforms SRCS ${cinn_transforms_srcs} DEPS cc_library( add_cinn_pass SRCS add_cinn_pass.cc - DEPS op_dialect pir cinn_op_dialect cinnapi pir_transforms cinn_transforms) + DEPS pir_transforms cinn_transforms) diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc index 3b6b1adcdbda1..80f929edd8f4d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc @@ -29,6 +29,7 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_0d_to_1d_pass.h" @@ -42,7 +43,6 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/insert_broadcast_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.h" -#include "paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/replace_dynamic_expand_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.h" #include "paddle/fluid/pir/transforms/build_cinn_pass.h" @@ -118,7 +118,7 @@ void ApplyBuildGroupOpPass( if (has_dynamic_shape) { pass_manager->AddPass(pir::CreateShapeOptimizationPass()); } - pass_manager->AddPass(cinn::dialect::ir::CreateRemoveUnchangedReshapePass()); + pass_manager->AddPass(cinn::dialect::ir::CreateFoldManipulationOpsPass()); pass_manager->AddPass(pir::CreateBuildCinnPass()); @@ -145,7 +145,7 @@ void ApplyGroupOpPass(::pir::Program* program, pass_manager->AddPass(cinn::dialect::ir::CreateDynamicReshapeOpPass()); pass_manager->AddPass(pir::CreateDeadCodeEliminationPass()); - pass_manager->AddPass(cinn::dialect::ir::CreateRemoveUnchangedReshapePass()); + pass_manager->AddPass(cinn::dialect::ir::CreateFoldManipulationOpsPass()); pass_manager->Run(program); } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc index e0c52169df0a6..d66943dfc8bf9 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc @@ -39,6 +39,7 @@ class AddYieldStoreInFusionOpPattern continue; } + rewriter.SetInsertionPointAfter(op->operand_source(i).defining_op()); auto store_op = rewriter.Build( op->operand_source(i), op->operand_source(i).type()); auto orignal_base = op->operand_source(i); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc index 2b8926bca6e60..9fd5a721ac825 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc @@ -28,6 +28,7 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.h" +#include "paddle/cinn/frontend/group_cluster/group_cluster.h" #include "paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h" #include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" @@ -48,8 +49,7 @@ #include "paddle/pir/include/pattern_rewrite/pattern_match.h" #include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h" -// #include "paddle/cinn/frontend/group_cluster/group_cluster.h" -// PD_DECLARE_bool(cinn_new_cluster_op_method); +PD_DECLARE_bool(cinn_new_cluster_op_method); namespace cinn { namespace dialect { @@ -249,7 +249,6 @@ std::vector<::pir::Value> GenerateOutputValue( if (outside_need_value.count(op->result(i))) { if (!inserted_val.count(op->result(i))) { temp_out.push_back(op->result(i)); - inserted_val.insert(op->result(i)); } } @@ -835,30 +834,35 @@ std::vector NodeMergeWithNode( return second_stage_output; } -// std::vector NewOpMergeWithOp( -// cinn::dialect::GroupOp group_op) { -// const auto cluster_result = frontend::ClusterOps(group_op); - -// // Each stmts corresponds to each fusion op(cluster node). -// // Concat all the ops of patterns in the stmts, and make them the op list -// of -// // cluster node. -// VLOG(4) << "Start Creating Cluster Nodes!"; -// std::vector output_cluster_nodes; -// for (const auto& op_set : cluster_result) { -// GroupClusterNode cluster_node; -// for (const auto* op : op_set) { -// cluster_node.ops.push_back(const_cast(op)); -// auto op_kind = cinn::hlir::framework::pir::CompatibleInfo::OpKind(*op); -// cluster_node.group_kind = -// cluster_node.group_kind > op_kind ? cluster_node.group_kind : -// op_kind; -// } -// output_cluster_nodes.push_back(cluster_node); -// } -// VLOG(4) << "Finished Creating Cluster Nodes!"; -// return output_cluster_nodes; -// } +std::vector NewOpMergeWithOp( + cinn::dialect::GroupOp group_op) { + auto cluster_result = frontend::ClusterOps(group_op.GetOperators(), true); + std::vector> result; + std::transform(cluster_result.begin(), + cluster_result.end(), + std::back_inserter(result), + [](const frontend::group_cluster::PatternNodePtr node) { + return node->GetOps(); + }); + + // Each stmts corresponds to each fusion op(cluster node). + // Concat all the ops of patterns in the stmts, and make them the op list of + // cluster node. + VLOG(4) << "Start Creating Cluster Nodes!"; + std::vector output_cluster_nodes; + for (const auto& op_set : result) { + GroupClusterNode cluster_node; + for (const auto* op : op_set) { + cluster_node.ops.push_back(const_cast(op)); + auto op_kind = cinn::hlir::framework::pir::CompatibleInfo::OpKind(*op); + cluster_node.group_kind = + cluster_node.group_kind > op_kind ? cluster_node.group_kind : op_kind; + } + output_cluster_nodes.push_back(cluster_node); + } + VLOG(4) << "Finished Creating Cluster Nodes!"; + return output_cluster_nodes; +} std::vector OpMergeWithOp(cinn::dialect::GroupOp group_op) { // op merge with op @@ -926,9 +930,9 @@ std::vector OpMergeWithOp(cinn::dialect::GroupOp group_op) { std::vector GroupSplit(cinn::dialect::GroupOp group_op) { // stage 1 - // if (FLAGS_cinn_new_cluster_op_method) { - // return NewOpMergeWithOp(group_op); - // } + if (FLAGS_cinn_new_cluster_op_method) { + return NewOpMergeWithOp(group_op); + } auto first_stage_output = OpMergeWithOp(group_op); @@ -1044,14 +1048,12 @@ class CinnGroupClusterPattern // update ir mapping for (size_t i = 0; i < output_values.size(); ++i) { ir_mapping.Add(output_values[i], new_group_op->result(i)); - if (shape_analysis.HasShapeOrDataForValue(output_values[i])) { shape_analysis.SetShapeOrDataForValue( new_group_op->result(i), shape_analysis.GetShapeOrDataForValue(output_values[i])); } } - for (size_t i = 0; i < output_values.size(); ++i) { auto find_it = all_output_values.find(output_values[i]); if ((find_it != all_output_values.end()) && @@ -1062,6 +1064,7 @@ class CinnGroupClusterPattern } } } + rewriter.EraseOp(group_op); return true; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc similarity index 69% rename from paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc rename to paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc index a2c09cc14a8dc..bbd79947314d2 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h" #include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h" @@ -75,7 +75,7 @@ bool RemoveOp(pir::Operation* op, pir::PatternRewriter* rewriter) { } template -class RemoveUnchangedReshapePattern : public pir::OpRewritePattern { +class RemoveUnchangedOpPattern : public pir::OpRewritePattern { public: using pir::OpRewritePattern::OpRewritePattern; @@ -85,18 +85,19 @@ class RemoveUnchangedReshapePattern : public pir::OpRewritePattern { } }; -class MergeReshapePattern - : public pir::OpRewritePattern { +template +class MergeRedundantOpPattern : public pir::OpRewritePattern { public: - using pir::OpRewritePattern::OpRewritePattern; + using pir::OpRewritePattern::OpRewritePattern; - bool MatchAndRewrite(cinn::dialect::ReshapeOp op, + bool MatchAndRewrite(OPTYPE op, pir::PatternRewriter& rewriter) const override { - if (auto pre_shape = op->operand_source(0) - .defining_op() - ->dyn_cast()) { - op->operand(0).set_source(pre_shape->operand_source(0)); - + if (auto pre_op = (op->operand_source(0).defining_op()) + ->template dyn_cast()) { + op->operand(0).set_source(pre_op->operand_source(0)); + if (pre_op->use_empty()) { + rewriter.EraseOp(pre_op); + } return true; } @@ -104,18 +105,24 @@ class MergeReshapePattern } }; -class RemoveUnchangedReshapePass : public pir::PatternRewritePass { +class FoldManipulationOpsPass : public pir::PatternRewritePass { public: - RemoveUnchangedReshapePass() - : pir::PatternRewritePass("remove_unchanged_reshape_pass", 1) {} + FoldManipulationOpsPass() + : pir::PatternRewritePass("fold_manipulation_ops_pass", 1) {} pir::RewritePatternSet InitializePatterns(pir::IrContext* context) override { pir::RewritePatternSet ps(context); - // remove out_shape equal in_shape reshape op - ps.Add>(context); - ps.Add>(context); - ps.Add(context); + // remove out_shape equal in_shape ops + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + // merge redundant ops + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); ps.Add(context); return ps; @@ -126,13 +133,12 @@ class RemoveUnchangedReshapePass : public pir::PatternRewritePass { } }; -std::unique_ptr CreateRemoveUnchangedReshapePass() { - return std::make_unique(); +std::unique_ptr CreateFoldManipulationOpsPass() { + return std::make_unique(); } - } // namespace ir } // namespace dialect } // namespace cinn -REGISTER_IR_PASS(remove_unchanged_reshape_pass, - ::cinn::dialect::ir::RemoveUnchangedReshapePass); +REGISTER_IR_PASS(fold_manipulation_ops_pass, + ::cinn::dialect::ir::FoldManipulationOpsPass); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h similarity index 93% rename from paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h rename to paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h index ef75306748af2..239ba863389f7 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h @@ -21,7 +21,7 @@ namespace cinn { namespace dialect { namespace ir { -std::unique_ptr CreateRemoveUnchangedReshapePass(); +std::unique_ptr CreateFoldManipulationOpsPass(); } // namespace ir } // namespace dialect } // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc index d1550a2bdf257..72219287fe3e3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc @@ -181,10 +181,23 @@ class DynamicToStaticConverter { CHECK(shape_analysis_->HasShapeOrDataForValue(value)); const auto& origin_shape = GetOriginValueShape(value); const auto& target_shape = GetTargetValueShape(value); - CHECK_EQ(origin_shape.size(), target_shape.size()); + PADDLE_ENFORCE_EQ( + origin_shape.size(), + target_shape.size(), + phi::errors::InvalidArgument( + "The size of origin shape and target shape is not equal," + "where the size of origin shape:%d but the size of target " + "shape:%d.", + origin_shape.size(), + target_shape.size())); for (std::size_t i = 0; i < origin_shape.size(); ++i) { if (origin_shape.at(i) == -1) { - CHECK_GT(target_shape.at(i), 0); + PADDLE_ENFORCE_GT(target_shape.at(i), + 0, + phi::errors::InvalidArgument( + "The size of target shape is incorrect." + "Expected size is larger than 0, but receive %d.", + target_shape.at(i))); update = true; } else { CHECK(origin_shape.at(i) == target_shape.at(i)); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc index e67cb5aacabfa..e20cab270cdd3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc @@ -154,7 +154,15 @@ struct StaticDimToDynamicConverter { const auto& origin_shape = GetOriginValueShape(value); const auto& target_shape = GetTargetValueShape( shape_analysis->GetShapeOrDataForValue(value).shape()); - CHECK_EQ(origin_shape.size(), target_shape.size()); + PADDLE_ENFORCE_EQ( + origin_shape.size(), + target_shape.size(), + phi::errors::InvalidArgument( + "The size of origin shape and target shape is not equal," + "where the size of origin shape:%d but the size of target " + "shape:%d.", + origin_shape.size(), + target_shape.size())); const auto& origin_type = value.type().dyn_cast<::pir::DenseTensorType>(); pir::DenseTensorType target_type = pir::DenseTensorType::get(pir::IrContext::Instance(), diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc index 79b8a70d28acc..1b0519938c933 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc @@ -1941,7 +1941,14 @@ class GeneralFusionMergePassHelper { } } - CHECK_GE(producer->consumer_groups().size(), candidates.size()); + PADDLE_ENFORCE_GE( + producer->consumer_groups().size(), + candidates.size(), + phi::errors::InvalidArgument( + "The size of producer consumer groups is incorrect." + "Expected size is greater than or equal to %d, but receive %d.", + candidates.size(), + producer->consumer_groups().size())); if (producer->consumer_groups().size() == 0 && candidates.size() == 0 && output_ops_set_.count(producer->CollectOps()[0]) == 0) { producer->belong_groups.insert(*fusionable_consumers->begin()); @@ -2204,8 +2211,24 @@ class GeneralFusionMergePassHelper { CHECK(consumer->belong_groups.size()); consumers.insert(*consumer->belong_groups.begin()); } - CHECK_EQ(group->producer_groups().size(), producers.size()); - CHECK_EQ(group->consumer_groups().size(), consumers.size()); + PADDLE_ENFORCE_EQ( + group->producer_groups().size(), + producers.size(), + phi::errors::InvalidArgument( + "The size of group's producer groups and producers is not equal," + "where the size of group's producer groups:%d but the size of " + "producers:%d.", + group->producer_groups().size(), + producers.size())); + PADDLE_ENFORCE_EQ( + group->consumer_groups().size(), + consumers.size(), + phi::errors::InvalidArgument( + "The size of group's consumer groups and consumers is not equal," + "where the size of group's consumer groups:%d but the size of " + "consumers:%d.", + group->consumer_groups().size(), + consumers.size())); (*group->mut_producer_groups()) = producers; (*group->mut_consumer_groups()) = consumers; } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc index 97570459eebc1..a9c9a6a68111c 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc @@ -49,72 +49,6 @@ void VisitEachValue(const pir::Operation* op, const DoEachT& DoEach) { } } -symbol::TensorShapeOrDataDimExprs SubstituteTensorShapeOrData( - const symbol::TensorShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& - substitution_pattern) { - auto SubstituteOneDimExpr = - [](const std::vector& original_dim_expr, - const std::unordered_map& - substitution_pattern) -> std::vector { - std::vector substituted_dim_expr{}; - for (const symbol::DimExpr& dim_expr : original_dim_expr) { - const auto& tmp_dim_expr = - symbol::SubstituteDimExpr(dim_expr, substitution_pattern); - substituted_dim_expr.push_back(symbol::SimplifyDimExpr(tmp_dim_expr)); - } - return substituted_dim_expr; - }; - - std::vector substituted_shape = - SubstituteOneDimExpr(shape_or_data.shape(), substitution_pattern); - if (!shape_or_data.data().has_value()) { - return symbol::ShapeOrData(substituted_shape); - } else { - std::vector substituted_data = SubstituteOneDimExpr( - shape_or_data.data().value(), substitution_pattern); - return symbol::ShapeOrData(substituted_shape, - substituted_data); - } -} - -symbol::ShapeOrDataDimExprs SubstituteShapeOrData( - const symbol::ShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& - substitution_pattern) { - auto lambdas = symbol::Overloaded{ - [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - return symbol::ShapeOrDataDimExprs(SubstituteTensorShapeOrData( - tensor_shape_or_data, substitution_pattern)); - }, - [&](const symbol::TensorListShapeOrDataDimExprs& tensor_list) { - symbol::TensorListShapeOrDataDimExprs substituted_tensor_list; - for (symbol::TensorShapeOrDataDimExprs tensor_shape_or_data : - tensor_list) { - substituted_tensor_list.push_back(SubstituteTensorShapeOrData( - tensor_shape_or_data, substitution_pattern)); - } - return symbol::ShapeOrDataDimExprs(substituted_tensor_list); - }}; - return std::visit(lambdas, shape_or_data.variant()); -} - -int GetDimExprPriority(const symbol::DimExpr& dim_expr) { - return std::visit( - symbol::Overloaded{ - [&](std::int64_t) { return 0; }, - [&](const std::string&) { return 1; }, - [&](const symbol::Negative&) { return 2; }, - [&](const symbol::Reciprocal&) { return 2; }, - [&](const symbol::Add&) { return 2; }, - [&](const symbol::Mul&) { return 2; }, - [&](const symbol::Max&) { return 2; }, - [&](const symbol::Min&) { return 2; }, - [&](const symbol::Broadcast&) { return 2; }, - }, - dim_expr.variant()); -} - std::unordered_map GetDimExprSubstitution( pir::ShapeConstraintIRAnalysis* shape_analysis) { const std::vector& dim_expr_constraints = @@ -139,7 +73,8 @@ std::unordered_map GetDimExprSubstitution( CHECK(!dim_expr_cluster.empty()); auto dim_expr_root = dim_expr_cluster[0]; for (const auto& dim_expr : dim_expr_cluster) { - if (GetDimExprPriority(dim_expr) < GetDimExprPriority(dim_expr_root)) { + if (symbol::GetDimExprPriority(dim_expr) < + symbol::GetDimExprPriority(dim_expr_root)) { dim_expr_root = dim_expr; } } @@ -170,7 +105,8 @@ void SubstituteDimExprBasedOnConstraints(pir::Operation* region_op) { VLOG(8) << op->name() << " origin_shape_or_data: " << origin_shape_or_data; const symbol::ShapeOrDataDimExprs& substituted_shape_or_data = - SubstituteShapeOrData(origin_shape_or_data, substitution_pattern); + symbol::SubstituteShapeOrData(origin_shape_or_data, + substitution_pattern); VLOG(8) << op->name() << " substituted_shape_or_data: " << substituted_shape_or_data; shape_analysis->SetShapeOrDataForValue(value, diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc index 7068221d77fe5..22917b41d5b1c 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc @@ -108,11 +108,12 @@ void UpdateGroupShapeExprs( const auto& origin_shape_or_data = origin_group->GetShapeOrDataExprs(origin_val); if (origin_shape_or_data.data()) { + std::vector shape_dim_expr_shape = { + symbol::DimExpr(static_cast(shape_dim_expr.size()))}; new_group->SetShapeOrDataExprs( new_val, symbol::ShapeOrDataDimExprs{symbol::TensorShapeOrDataDimExprs( - std::vector{shape_dim_expr.size()}, - shape_dim_expr)}); + shape_dim_expr_shape, shape_dim_expr)}); } else { new_group->SetShapeOrDataExprs( new_val, @@ -134,7 +135,9 @@ bool EraseOneExpand( if (!SameInputOutputShape(expand, ShapeOrDataDimExprs4Value)) continue; auto generate_shape_op = expand.shape().defining_op(); - CHECK_NOTNULL(generate_shape_op); + PADDLE_ENFORCE_NOT_NULL(generate_shape_op, + phi::errors::PreconditionNotMet( + "The generate shape op must not be null.")); rewriter.ReplaceAllUsesWith(expand.out(), expand.x()); rewriter.EraseOp(expand); if (generate_shape_op->use_empty()) { @@ -280,7 +283,15 @@ void SetLeafBlockByGroupView( } auto new_group = CloneGroup(origin_group, block, &ir_mapping); - CHECK_EQ(origin_group->ops().size(), new_group->ops().size()); + PADDLE_ENFORCE_EQ( + origin_group->ops().size(), + new_group->ops().size(), + phi::errors::InvalidArgument( + "The size of origin group ops and new group ops is not equal," + "where the size of origin group ops:%d but the size of new group " + "ops:%d.", + origin_group->ops().size(), + new_group->ops().size())); UpdateGroupShapeExprs(new_group, origin_group, ir_mapping, diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc index 4ef8a486f21e0..7526ad1ab6309 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc @@ -136,58 +136,13 @@ bool IsShapeOrDataNeedSubstitute( return ret; } -symbol::TensorShapeOrDataDimExprs SubstituteTensorShapeOrData( - const symbol::TensorShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& dim_expr_map) { - const auto& SimplifyDimExpr = - [&](const std::vector& original_dim_expr) - -> std::vector { - std::vector simplified_dim_expr{}; - for (const symbol::DimExpr& dim_expr : original_dim_expr) { - simplified_dim_expr.push_back(symbol::SimplifyDimExpr( - symbol::SubstituteDimExpr(dim_expr, dim_expr_map))); - } - return simplified_dim_expr; - }; - - std::vector simplified_shape = - SimplifyDimExpr(shape_or_data.shape()); - if (!shape_or_data.data().has_value()) { - return symbol::ShapeOrData(simplified_shape); - } - std::vector simplified_data = - SimplifyDimExpr(shape_or_data.data().value()); - return symbol::ShapeOrData(simplified_shape, - simplified_data); -} - -symbol::ShapeOrDataDimExprs SubstituteShapeOrData( - const symbol::ShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& dim_expr_map) { - auto lambdas = symbol::Overloaded{ - [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - return symbol::ShapeOrDataDimExprs( - SubstituteTensorShapeOrData(tensor_shape_or_data, dim_expr_map)); - }, - [&](const symbol::TensorListShapeOrDataDimExprs& tensor_list) { - symbol::TensorListShapeOrDataDimExprs simplified_tensor_list; - for (symbol::TensorShapeOrDataDimExprs tensor_shape_or_data : - tensor_list) { - simplified_tensor_list.push_back( - SubstituteTensorShapeOrData(tensor_shape_or_data, dim_expr_map)); - } - return symbol::ShapeOrDataDimExprs(simplified_tensor_list); - }}; - return std::visit(lambdas, shape_or_data.variant()); -} - symbol::ShapeOrDataDimExprs TrySubstitute( const symbol::ShapeOrDataDimExprs& shape_or_data, const std::unordered_map& dim_expr_map) { if (!IsShapeOrDataNeedSubstitute(shape_or_data, dim_expr_map)) { return shape_or_data; } - return SubstituteShapeOrData(shape_or_data, dim_expr_map); + return symbol::SubstituteShapeOrData(shape_or_data, dim_expr_map); } void InferSymbolicShapeForOperation( diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc index 0e7ebb8e9499d..3fa26f51b5592 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc @@ -34,6 +34,9 @@ pir::Operation* ProcessDyShapeGroup( const OpLoweringGroupPtr& group, pir::ShapeConstraintIRAnalysis& shape_analysis, // NOLINT pir::PatternRewriter& rewriter) { // NOLINT + // NOTE(dev): Need UpdateShapeOrDataExprs firstly and the logic + // will be migated into BucketLower later. + UpdateGroupShapeOrDataExprs(const_cast(group)); auto group_inputs = GetBlockOutsideInput(group->ops()); GroupDimExprInfo group_dim_expr_info = GetGroupDimExprInfo(group); const auto& leaves = group_dim_expr_info.all_value_dim_exprs; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc index e4724c617dfaf..29c127b42d10d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc @@ -78,7 +78,8 @@ CompileGroupAsOpAttribute(const std::vector& group_list) { std::unordered_map GetJitKernelAttr( const OpLoweringGroupPtr& group) { - auto kernel_info = CompilationCache::Instance().GetKernelInfo(group); + hlir::framework::pir::FusionInfo fusion_info(*group); + auto kernel_info = CompilationCache::Instance().GetKernelInfo(fusion_info); std::unordered_map attrs{ {cinn::dialect::JitKernelOp::kAttrName, cinn::dialect::CINNKernelInfoAttribute::get(pir::IrContext::Instance(), @@ -88,33 +89,36 @@ std::unordered_map GetJitKernelAttr( OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { auto fusion_op = fusion_op_ptr->dyn_cast(); - auto group = std::make_shared(); - group->set_op_pattern_kind( - cinn::hlir::framework::OpPatternKind::kElementWise); + std::vector<::pir::Operation*> ops; + auto group_op_kind = cinn::hlir::framework::OpPatternKind::kElementWise; + // Rebuild ops of the group + for (auto op : fusion_op.GetOperators()) { + if (!op->isa<::pir::YieldOp>()) { + ops.push_back(op); + group_op_kind = static_cast(CompatibleInfo::OpKind(*op)) > + static_cast(group_op_kind) + ? CompatibleInfo::OpKind(*op) + : group_op_kind; + } + } + + auto group = std::make_shared(ops); + if (fusion_op.attributes().count("group_info")) { auto attr = fusion_op.attribute("group_info") .dyn_cast() .data(); - group->set_op_pattern_kind(attr.op_pattern_kind); + group_op_kind = + static_cast(attr.op_pattern_kind) > static_cast(group_op_kind) + ? attr.op_pattern_kind + : group_op_kind; group->set_loop_ranges(attr.loop_ranges); group->set_loop_ranges_expr(attr.loop_ranges_expr); - group->set_reduce_axis(attr.reduce_axis); group->set_alignment_schedule_info(attr.alignment_schedule_info); } - - // Rebuild ops of the group - for (auto op : fusion_op.GetOperators()) { - if (!op->isa<::pir::YieldOp>()) { - group->mut_ops().push_back(op); - auto op_pattern_kind = static_cast(CompatibleInfo::OpKind(*op)) > - static_cast(group->op_pattern_kind()) - ? CompatibleInfo::OpKind(*op) - : group->op_pattern_kind(); - group->set_op_pattern_kind(op_pattern_kind); - } - } + group->set_op_pattern_kind(group_op_kind); // Rebuild output_ops and input_ops of the group auto yield_op = fusion_op.GetOperators().back(); @@ -127,10 +131,7 @@ OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { // Because the group is rebuilt, the order of group.output_values generated // by BuildCUDAJITInfo may not be same with the order bound in the yield op, // so a mapping is required. - auto& shape_analysis = - pir::ShapeAnalysisManager::Instance().Get(fusion_op->GetParentProgram()); - group->set_value_to_shape_or_data_exprs( - CreateGroupShapeOrDataExprs(group, shape_analysis)); + UpdateGroupShapeOrDataExprs(group); if (FLAGS_cinn_enable_map_expr) { cinn::adt::TryGenerateMapExprFromGroup(group); } @@ -139,4 +140,11 @@ OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { return group; } +void UpdateGroupShapeOrDataExprs(OpLoweringGroupPtr group) { + auto& shape_analysis = + pir::ShapeAnalysisManager::Instance().Get(group->GetParentProgram()); + group->set_value_to_shape_or_data_exprs( + CreateGroupShapeOrDataExprs(group, shape_analysis)); +} + } // namespace cinn::dialect::ir::details diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h index 3b3ba4379d57c..5c5d0c104390a 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h @@ -31,4 +31,6 @@ std::unordered_map GetJitKernelAttr( OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr); +void UpdateGroupShapeOrDataExprs(OpLoweringGroupPtr group); + } // namespace cinn::dialect::ir::details diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc index 3bf32aa91837d..be57629fe8747 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc @@ -761,8 +761,8 @@ class FullWithTensorOpPattern bool MatchAndRewrite(paddle::dialect::FullWithTensorOp op, pir::PatternRewriter &rewriter) const override { - auto shape = op->operand_source(0); - auto value = op->operand_source(1); + auto value = op->operand_source(0); + auto shape = op->operand_source(1); if (paddle::dialect::TransToPhiDataType( value.type() diff --git a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc index 19e7f5060eb96..696449b471b3d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc @@ -143,7 +143,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Add& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value acc = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { if (operands->at(i).isa>()) { @@ -162,7 +167,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Mul& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value prod = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { if (operands->at(i).isa>()) { @@ -182,7 +192,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Max& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value max = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); @@ -193,7 +208,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Min& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value min = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); @@ -205,7 +225,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl( const symbol::Broadcast& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value broadcasted = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); diff --git a/paddle/cinn/hlir/framework/pir/CMakeLists.txt b/paddle/cinn/hlir/framework/pir/CMakeLists.txt index 3b09925b94830..bf8cd25f48e4b 100755 --- a/paddle/cinn/hlir/framework/pir/CMakeLists.txt +++ b/paddle/cinn/hlir/framework/pir/CMakeLists.txt @@ -8,7 +8,8 @@ gather_srcs( op_lowering_impl.cc op_mapper.cc op_lowering_util.cc + trivial_op_impl.cc + trivial_op_util.cc compilation_task.cc compilation_cache.cc - trivial_op_impl.cc - trivial_op_util.cc) + fusion_info.cc) diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.cc b/paddle/cinn/hlir/framework/pir/compilation_cache.cc index 47a38442b58a5..9b98597a50265 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.cc @@ -39,38 +39,20 @@ void* BackendResource::GetInferFuncPtr() const { return ptr; } -std::shared_ptr& BackendResource::GetBackendCompiler() { - return backend_compiler_; -} - -const std::shared_ptr& BackendResource::GetBackendCompiler() - const { - return backend_compiler_; -} - -void BackendResource::SetHostFnName(const std::string& name) { - host_fn_name_ = name; -} - -void BackendResource::SetInferFnName(const std::string& name) { - infer_fn_name_ = name; -} - -pir::CINNKernelInfo BackendResource::GernerateKernelInfo( - const std::shared_ptr& group) const { +pir::CINNKernelInfo BackendResource::GenerateKernelInfo() const { pir::CINNKernelInfo kernel_info; kernel_info.fn_name = host_fn_name_; kernel_info.fn_ptr = GetHostFuncPtr(); kernel_info.infer_shape_fn_ptr = GetInferFuncPtr(); - kernel_info.int_args_map = group->int_args_map(); + kernel_info.int_args_map = GetIntArgsMap(); return kernel_info; } } // namespace pir bool CompilationCache::Has(const CacheKey& key) const { - const bool has_existed = cache_.find(KeyHash(key)) != cache_.end(); - VLOG(6) << "Check IsExisted in CompilationCache: " << key->FuncName() << " " - << has_existed; + const bool has_existed = cache_.find(key) != cache_.end(); + VLOG(6) << "Check IsExisted in CompilationCache: " << has_existed << " - " + << key; return has_existed; } @@ -79,24 +61,19 @@ const CompilationCache::CacheValue& CompilationCache::Get( PADDLE_ENFORCE_EQ( Has(key), true, - phi::errors::NotFound("%s is not in CompliatonCache.", key->FuncName())); - return cache_.at(KeyHash(key)); + phi::errors::NotFound("%s is not in CompliatonCache.", key)); + return cache_.at(key); } pir::CINNKernelInfo CompilationCache::GetKernelInfo(const CacheKey& key) const { - return Get(key)->GetKernelInfo(key); + return Get(key)->GetKernelInfo(); } void CompilationCache::Insert(const CacheKey& key, const CacheValue& value) { - VLOG(6) << "Insert CompilationCache for: " << key->FuncName(); - cache_.insert({KeyHash(key), value}); + VLOG(6) << "Insert CompilationCache for: " << key; + cache_.insert({key, value}); } void CompilationCache::Clear() { cache_.clear(); } -size_t CompilationCache::KeyHash(const CacheKey& key) const { - // TODO(Aurelius84): use a better hash function in next pr. - return std::hash{}(key->FuncName()); -} - } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.h b/paddle/cinn/hlir/framework/pir/compilation_cache.h index 018bd6fd85572..547a1889f01a6 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.h +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.h @@ -19,6 +19,7 @@ #include "paddle/cinn/backends/compiler.h" #include "paddle/cinn/common/macros.h" #include "paddle/cinn/common/target.h" +#include "paddle/cinn/hlir/framework/pir/fusion_info.h" #include "paddle/cinn/hlir/framework/pir/utils.h" namespace cinn::hlir::framework { @@ -27,76 +28,79 @@ namespace pir { class OpLoweringGroup; class BackendResource final { public: - BackendResource(const Target& target) { - backend_compiler_ = backends::Compiler::Create(target); - } - BackendResource(const Target& target, const std::string& host_fn_name, - const std::string& infer_fn_name) - : host_fn_name_(host_fn_name), infer_fn_name_(infer_fn_name) { + const std::string& infer_fn_name, + const std::map& int_args_map) + : host_fn_name_(host_fn_name), + infer_fn_name_(infer_fn_name), + int_args_map_(int_args_map) { backend_compiler_ = backends::Compiler::Create(target); } void* GetHostFuncPtr() const; void* GetInferFuncPtr() const; - pir::CINNKernelInfo GernerateKernelInfo( - const std::shared_ptr& group) const; - std::shared_ptr& GetBackendCompiler(); - const std::shared_ptr& GetBackendCompiler() const; - void SetHostFnName(const std::string& name); - void SetInferFnName(const std::string& name); + const std::map& GetIntArgsMap() const { + return int_args_map_; + } + const std::shared_ptr& GetBackendCompiler() const { + return backend_compiler_; + } + pir::CINNKernelInfo GenerateKernelInfo() const; private: std::string host_fn_name_; std::string infer_fn_name_; - // std::string host_code_; - // std::vector device_code_; - std::shared_ptr backend_compiler_; + std::map int_args_map_; + + std::shared_ptr backend_compiler_{nullptr}; }; class CompilationResult final { public: - explicit CompilationResult(const Target& target) - : target_(target), backend_resource_(target) {} - - BackendResource& MutableBackendResource() { return backend_resource_; } - const BackendResource& GetBackendResource() const { + explicit CompilationResult(const Target& target) : target_(target) {} + const std::shared_ptr& GetBackendResource() const { return backend_resource_; } - pir::CINNKernelInfo GetKernelInfo( - const std::shared_ptr& group) { - return backend_resource_.GernerateKernelInfo(group); + + void SetBackendResource(const std::shared_ptr& other) { + backend_resource_ = other; + } + + pir::CINNKernelInfo GetKernelInfo() { + // TODO(Aurelius84): add ENFORCE_NOT_NULL + return backend_resource_->GenerateKernelInfo(); } private: Target target_; - BackendResource backend_resource_; + std::shared_ptr backend_resource_{nullptr}; }; + } // namespace pir class CompilationCache { public: - using CacheKey = std::shared_ptr; + using CacheKey = pir::FusionInfo; using CacheValue = std::shared_ptr; static CompilationCache& Instance() { - static CompilationCache instance; + thread_local static CompilationCache instance; return instance; } bool Has(const CacheKey& key) const; const CacheValue& Get(const CacheKey& key) const; - pir::CINNKernelInfo GetKernelInfo(const CacheKey& key) const; void Insert(const CacheKey& key, const CacheValue& value); void Clear(); - size_t KeyHash(const CacheKey& key) const; + + pir::CINNKernelInfo GetKernelInfo(const CacheKey& key) const; private: CompilationCache() = default; CINN_DISALLOW_COPY_AND_ASSIGN(CompilationCache); - std::unordered_map cache_; + std::unordered_map cache_; }; } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.cc b/paddle/cinn/hlir/framework/pir/compilation_task.cc index a93ac960d496a..85f4d2849ea80 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_task.cc @@ -42,15 +42,9 @@ std::string GroupCompilationContext::PrintPredicate2Funcs() const { return ss.str(); } -void CompilationTask::operator()() { - VLOG(4) << "Run Compilation Task for : " << context_->group_.get(); - if (CompilationCache::Instance().Has(context_->group_)) { - VLOG(4) << "Found cached kernel info for group: " - << context_->group_->FuncName(); - return; - } +std::shared_ptr CompilationTask::operator()() { Lowering(); - CodegenAndJit(); + return CodegenAndJit(); } void CompilationTask::Lowering() { @@ -62,7 +56,7 @@ void CompilationTask::Lowering() { /* apply pass = */ true)); } -void CompilationTask::CodegenAndJit() { +std::shared_ptr CompilationTask::CodegenAndJit() { ir::Module::Builder builder(cinn::common::UniqName("module"), context_->target_); CHECK_EQ(context_->predicates_.size(), context_->lowered_funcs_.size()); @@ -74,27 +68,22 @@ void CompilationTask::CodegenAndJit() { } builder.SetInferShapeFunc(context_->infer_shape_lowered_func_); ir::Module ir_module = builder.Build(); - BuildPirCINNKernelInfo(ir_module); -} - -pir::CINNKernelInfo CompilationTask::GetCINNKernelInfo() { - if (!CompilationCache::Instance().Has(context_->group_)) { - PADDLE_THROW(phi::errors::NotFound( - "Kernel info has been cached for current group.")); - } - return CompilationCache::Instance().GetKernelInfo(context_->group_); + return BuildPirCINNKernelInfo(ir_module); } -void CompilationTask::BuildPirCINNKernelInfo(const ir::Module& module) { +std::shared_ptr CompilationTask::BuildPirCINNKernelInfo( + const ir::Module& module) { auto compilation_result = std::make_shared(context_->target_); - pir::BackendResource& backend_resource = - compilation_result->MutableBackendResource(); - backend_resource.GetBackendCompiler()->Build(module, ""); - backend_resource.SetHostFnName(context_->group_->FuncName()); - backend_resource.SetInferFnName(context_->group_->FuncName() + - "_infer_shape"); - CompilationCache::Instance().Insert(context_->group_, compilation_result); + auto backend_resource = std::make_shared( + context_->target_, + context_->group_->FuncName(), + context_->group_->FuncName() + "_infer_shape", + context_->group_->int_args_map()); + VLOG(5) << "Start to compile module into cuda kernel..."; + backend_resource->GetBackendCompiler()->Build(module, ""); + compilation_result->SetBackendResource(backend_resource); + return compilation_result; } } // namespace framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.h b/paddle/cinn/hlir/framework/pir/compilation_task.h index 69e985afd7869..d104d264b6852 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.h +++ b/paddle/cinn/hlir/framework/pir/compilation_task.h @@ -50,14 +50,13 @@ class CompilationTask { explicit CompilationTask(GroupCompilationContext* context) : context_(context) {} - void operator()(); - pir::CINNKernelInfo GetCINNKernelInfo(); + std::shared_ptr operator()(); private: void Lowering(); - void CodegenAndJit(); - std::unique_ptr BuildInstruction(); - void BuildPirCINNKernelInfo(const ir::Module& module); + std::shared_ptr CodegenAndJit(); + std::shared_ptr BuildPirCINNKernelInfo( + const ir::Module& module); GroupCompilationContext* context_; }; diff --git a/paddle/cinn/hlir/framework/pir/fusion_info.cc b/paddle/cinn/hlir/framework/pir/fusion_info.cc new file mode 100644 index 0000000000000..f3b1979e6627e --- /dev/null +++ b/paddle/cinn/hlir/framework/pir/fusion_info.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/hlir/framework/pir/fusion_info.h" +#include "paddle/common/enforce.h" +#include "paddle/pir/include/core/ir_printer.h" + +namespace cinn::hlir::framework::pir { + +constexpr static char* kOpCallStack = "op_callstack"; + +std::size_t AttributeInfo::hash() const { return attr_.hash(); } + +std::ostream& operator<<(std::ostream& os, const AttributeInfo& attr_info) { + os << "AttributeInfo - " << attr_info.name_ << ", " << attr_info.hash(); + if (VLOG_IS_ON(7)) { + os << " ("; + ::pir::IrPrinter(os).PrintAttribute(attr_info.attr_); + os << ")"; + } + return os; +} + +std::size_t ValueInfo::hash() const { return type_.hash(); } + +std::ostream& operator<<(std::ostream& os, const ValueInfo& value_info) { + os << "ValueInfo - " << value_info.hash(); + if (VLOG_IS_ON(7)) { + os << "("; + ::pir::IrPrinter(os).PrintType(value_info.type_); + os << ")"; + } + return os; +} + +OperationInfo::OperationInfo(const ::pir::Operation& op) { + name_ = op.name(); + for (const auto value : op.operands_source()) { + if (!value || !value.type()) continue; + input_infos_.emplace_back(value); + } + for (const auto value : op.results()) { + if (!value || !value.type()) continue; + output_infos_.emplace_back(value); + } + // Keep attribute always in order. + const auto& attributes = op.attributes(); + std::map> order_attributes( + attributes.begin(), attributes.end()); + for (const auto& [attr_name, attr_value] : order_attributes) { + if (!attr_value || attr_name == kOpCallStack) continue; + attr_infos_.emplace_back(attr_name, attr_value); + } +} + +std::size_t OperationInfo::hash() const { + std::size_t seed = 1789; + hash_combine(seed, name_); + for (const auto& info : input_infos_) hash_combine(seed, info); + for (const auto& info : output_infos_) hash_combine(seed, info); + for (const auto& info : attr_infos_) hash_combine(seed, info); + return seed; +} + +std::ostream& operator<<(std::ostream& os, const OperationInfo& op_info) { + os << op_info.name_ << " - " << op_info.hash(); + if (VLOG_IS_ON(7)) { + os << "{\n"; + for (const auto& info : op_info.input_infos_) os << info << "\n"; + for (const auto& info : op_info.output_infos_) os << info << "\n"; + for (const auto& info : op_info.attr_infos_) os << info << "\n"; + os << "}"; + } + return os; +} + +FusionInfo::FusionInfo(const OpLoweringGroup& group) { + for (const auto* op : TopologySort(group)) { + op_infos_.emplace_back(*op); + } +} + +std::size_t FusionInfo::hash() const { + if (cached_hash_value_ != 0U) { + return cached_hash_value_; + } + std::size_t seed = 2153; + for (const auto& info : op_infos_) hash_combine(seed, info); + return seed; +} + +std::ostream& operator<<(std::ostream& os, const FusionInfo& fusion_info) { + os << "FusionInfo - " << fusion_info.hash(); + if (VLOG_IS_ON(5)) { + os << "{\n"; + for (const auto& op_info : fusion_info.op_infos_) os << op_info << "\n"; + os << "}\n"; + } + return os; +} + +std::size_t HashIntArgsMap( + const std::map& int_args_map) { + std::size_t seed = 2153; + for (const auto& [input_idx, dim_idx] : int_args_map) { + hash_combine(seed, input_idx); + hash_combine(seed, dim_idx.arg_idx); + hash_combine(seed, dim_idx.dim_idx); + } + return seed; +} +std::ostream& operator<<( + std::ostream& os, + const std::map& int_args_map) { + os << "int_args_map: {\n"; + for (const auto& [input_idx, dim_idx] : int_args_map) { + os << "input_idx: " << input_idx << ":[ " << dim_idx.arg_idx << ", " + << dim_idx.dim_idx << " ]\n"; + } + os << "}\n"; +} + +std::vector TopologySort( + const OpLoweringGroup& group) { + // NOTE(Aurelius84): Use simplest one-by-one order temporaly. + auto* block = group.GetParentBlock(); + std::vector ops; + ops.reserve(block->size()); + for (auto& op : *block) { + ops.push_back(&op); + } + return ops; +} + +} // namespace cinn::hlir::framework::pir diff --git a/paddle/cinn/hlir/framework/pir/fusion_info.h b/paddle/cinn/hlir/framework/pir/fusion_info.h new file mode 100644 index 0000000000000..477e6934319cf --- /dev/null +++ b/paddle/cinn/hlir/framework/pir/fusion_info.h @@ -0,0 +1,118 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/cinn/hlir/framework/pir/op_lowering_group.h" + +namespace cinn::hlir::framework::pir { + +class AttributeInfo { + public: + AttributeInfo(const std::string &name, const ::pir::Attribute &attr) + : name_(name), attr_(attr) {} + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const AttributeInfo &info); + + private: + std::string name_; + ::pir::Attribute attr_; +}; + +class ValueInfo { + public: + explicit ValueInfo(const ::pir::Value &value) : type_(value.type()) {} + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const ValueInfo &info); + + private: + // All value information is in TypeStorage. + ::pir::Type type_; +}; + +class OperationInfo { + public: + explicit OperationInfo(const ::pir::Operation &op); + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const OperationInfo &info); + + private: + std::string name_; + std::vector input_infos_; + std::vector output_infos_; + std::vector attr_infos_; +}; + +class FusionInfo { + using IntArgsMap = std::map; + + public: + explicit FusionInfo(const OpLoweringGroup &group); + FusionInfo() = delete; + FusionInfo(const FusionInfo &) = default; + FusionInfo(FusionInfo &&) = default; + + std::size_t hash() const; + + bool operator==(const FusionInfo &other) const { + return this->hash() == other.hash(); + } + friend std::ostream &operator<<(std::ostream &os, const FusionInfo &info); + + private: + std::vector op_infos_; + std::size_t cached_hash_value_{0}; +}; + +std::ostream &operator<<(std::ostream &os, const AttributeInfo &info); +std::ostream &operator<<(std::ostream &os, const ValueInfo &info); +std::ostream &operator<<(std::ostream &os, const OperationInfo &info); +std::ostream &operator<<(std::ostream &os, const FusionInfo &info); + +// See boost.hash_combine for details +template +inline void hash_combine(std::size_t &seed, // NOLINT + const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +std::size_t HashIntArgsMap( + const std::map &int_args_map); +std::ostream &operator<<( + std::ostream &os, + const std::map &int_args_map); +std::vector TopologySort( + const OpLoweringGroup &group); + +} // namespace cinn::hlir::framework::pir + +namespace std { +#define REGISTER_STD_HASH(class_name) \ + template <> \ + struct hash { \ + std::size_t operator()( \ + const cinn::hlir::framework::pir::class_name &obj) const { \ + return obj.hash(); \ + } \ + }; + +REGISTER_STD_HASH(AttributeInfo); +REGISTER_STD_HASH(ValueInfo); +REGISTER_STD_HASH(OperationInfo); +REGISTER_STD_HASH(FusionInfo) +} // namespace std diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc index 8799c84969a04..f9bfed7c92727 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc @@ -19,6 +19,113 @@ namespace hlir { namespace framework { namespace pir { +::pir::Program* OpLoweringGroup::GetParentProgram() const { + PADDLE_ENFORCE_GT(ops_.size(), + 0, + ::common::errors::PreconditionNotMet( + "Require at least one op in the group.")); + PADDLE_ENFORCE_NOT_NULL( + ops_[0], + ::common::errors::Unavailable("Found group.ops_[0] is nullptr.")); + return ops_[0]->GetParentProgram(); +} + +::pir::Block* OpLoweringGroup::GetParentBlock() const { + PADDLE_ENFORCE_GT(this->ops_.size(), + 0, + ::common::errors::PreconditionNotMet( + "Required at least one operation in OpLoweringGroup.")); + auto* block = this->ops_[0]->GetParent(); + PADDLE_ENFORCE_NOT_NULL( + block, + ::common::errors::Unavailable( + "Required inner op's parent block must not be nullptr.")); + for (size_t i = 1; i < this->ops_.size(); ++i) { + PADDLE_ENFORCE_EQ(this->ops_[0]->GetParent(), + block, + ::common::errors::PreconditionNotMet( + "Required all ops must belong into same block.")); + } + + return block; +} + +std::vector<::pir::Value> OpLoweringGroup::GetGroupOutputValues() const { + std::unordered_set<::pir::Operation*> group_ops_set(this->ops_.begin(), + this->ops_.end()); + + std::vector<::pir::Value> output_values; + for (auto* op : this->ops_) { + for (size_t i = 0; i < op->num_results(); ++i) { + auto result = op->result(i); + if (!result) { + continue; + } + for (auto use_iter = result.use_begin(); use_iter != result.use_end(); + ++use_iter) { + auto* use_op = use_iter->owner(); + if (group_ops_set.find(use_op) == group_ops_set.end()) { + output_values.push_back(result); + break; + } + } + } + } + return output_values; +} + +std::unordered_set<::pir::Value> OpLoweringGroup::GetInputOpValues() const { + std::unordered_set<::pir::Value> group_inputs; + std::unordered_set<::pir::Operation*> ops_set(this->ops_.begin(), + this->ops_.end()); + + // count all op's input Value + for (auto op : ops_set) { + for (auto& value : op->operands_source()) { + if (!value || !value.type() || ops_set.count(value.defining_op())) + continue; + // if the input value owner op is not in OpSet, it's the group's input + group_inputs.insert(value); + } + } + return group_inputs; +} + +std::unordered_set<::pir::Value> OpLoweringGroup::GetOutputOpValues() const { + std::unordered_set<::pir::Value> group_outputs; + + for (auto op : this->output_ops_) { + for (auto& result : op->results()) { + if (!result || result.type()) { + continue; + } + + group_outputs.insert(result); + } + } + return group_outputs; +} + +const symbol::ShapeOrDataDimExprs& OpLoweringGroup::GetShapeOrDataExprs( + const ::pir::Value& value) const { + PADDLE_ENFORCE_EQ(HasShapeOrDataExprs(value), + true, + ::common::errors::NotFound( + "value not found in value_to_shape_or_data_exprs_")); + return value_to_shape_or_data_exprs_.at(value); +} + +void OpLoweringGroup::SetShapeOrDataExprs( + const ::pir::Value& value, + const symbol::ShapeOrDataDimExprs& shape_or_data) { + auto iter = value_to_shape_or_data_exprs_.find(value); + if (iter == value_to_shape_or_data_exprs_.end()) { + value_to_shape_or_data_exprs_.emplace(value, shape_or_data); + } else { + iter->second = shape_or_data; + } +} + std::shared_ptr OpLoweringGroup::Clone( ::pir::Block* target_block, ::pir::IrMapping* ir_mapping) const { std::vector<::pir::Operation*> new_ops; @@ -46,7 +153,6 @@ std::shared_ptr OpLoweringGroup::Clone( new_group->input_names_ = this->input_names_; new_group->output_names_ = this->output_names_; - new_group->fn_name_ = this->fn_name_; new_group->int_args_map_ = this->int_args_map_; new_group->alignment_schedule_info_ = this->alignment_schedule_info_; new_group->reduce_axis_ = this->reduce_axis_; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.h b/paddle/cinn/hlir/framework/pir/op_lowering_group.h index aaa2f31f0a60c..bfaf843cdf5f0 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.h +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.h @@ -22,6 +22,7 @@ #include "paddle/cinn/common/context.h" #include "paddle/cinn/hlir/framework/op.h" #include "paddle/cinn/hlir/framework/pir/utils.h" +#include "paddle/common/enforce.h" #include "paddle/pir/include/core/builtin_type_interfaces.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" @@ -38,124 +39,34 @@ namespace framework { namespace pir { class OpLoweringGroup { public: - OpLoweringGroup() = default; OpLoweringGroup(const OpLoweringGroup&) = delete; OpLoweringGroup(OpLoweringGroup&&) = delete; explicit OpLoweringGroup(const std::vector<::pir::Operation*>& group_ops) - : ops_(group_ops) {} - - explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops) - : ops_(group_ops) {} - - struct SharedGroupHasher { - size_t operator()( - const std::shared_ptr& group) const noexcept { - return std::hash()(group->group_id()); - } - }; - struct SharedGroupComparator { - bool operator()( - const std::shared_ptr& first, - const std::shared_ptr& second) const noexcept { - return first->group_id() == second->group_id(); - } - }; - - std::vector<::pir::Value> GetGroupOutputValues() const { - std::unordered_set<::pir::Operation*> group_ops_set(this->ops_.begin(), - this->ops_.end()); - - std::vector<::pir::Value> output_values; - for (auto* op : this->ops_) { - for (size_t i = 0; i < op->num_results(); ++i) { - auto result = op->result(i); - if (!result) { - continue; - } - for (auto use_iter = result.use_begin(); use_iter != result.use_end(); - ++use_iter) { - auto* use_op = use_iter->owner(); - if (group_ops_set.find(use_op) == group_ops_set.end()) { - output_values.push_back(result); - break; - } - } - } - } - return output_values; - } - - std::unordered_set<::pir::Value> GetInputOpValues() const { - std::unordered_set<::pir::Value> group_inputs; - - std::unordered_set<::pir::Operation*> ops_set; - for (auto op : this->ops_) { - ops_set.insert(op); - } - - // count all op's input Value - for (auto op : this->ops_) { - for (auto& value : op->operands_source()) { - if (!value || !value.type()) { - continue; - } - - if (!ops_set.count(value.defining_op())) { - // if the input value owner op is not in OpSet, it's the group's input - group_inputs.insert(value); - continue; - } - } - } - - return group_inputs; + : ops_(group_ops) { + fn_name_ = CompatibleInfo::GroupOpsName(ops_); } - std::unordered_set<::pir::Value> GetOutputOpValues() const { - std::unordered_set<::pir::Value> group_outputs; - - for (auto op : this->output_ops_) { - for (auto& result : op->results()) { - if (!result || result.type()) { - continue; - } - - group_outputs.insert(result); - } - } - return group_outputs; - } - - std::string FuncName() const { - if (fn_name_ == "") { - // TODO(Aurelius84): Polish this implementation. - const_cast(this)->fn_name_ = - CompatibleInfo::GroupOpsName(ops_); - } - return this->fn_name_; + explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops) + : ops_(group_ops) { + fn_name_ = CompatibleInfo::GroupOpsName(ops_); } + const std::string& FuncName() const { return this->fn_name_; } + ::pir::Block* GetParentBlock() const; + ::pir::Program* GetParentProgram() const; + std::vector<::pir::Value> GetGroupOutputValues() const; + std::unordered_set<::pir::Value> GetInputOpValues() const; + std::unordered_set<::pir::Value> GetOutputOpValues() const; const symbol::ShapeOrDataDimExprs& GetShapeOrDataExprs( - const ::pir::Value& value) const { - CHECK(value_to_shape_or_data_exprs_.count(value)) - << "value not found in value_to_shape_or_data_exprs_"; - return value_to_shape_or_data_exprs_.at(value); - } + const ::pir::Value& value) const; bool HasShapeOrDataExprs(const ::pir::Value& value) const { return value_to_shape_or_data_exprs_.count(value); } void SetShapeOrDataExprs(const ::pir::Value& value, - const symbol::ShapeOrDataDimExprs& shape_or_data) { - auto iter = value_to_shape_or_data_exprs_.find(value); - if (iter == value_to_shape_or_data_exprs_.end()) { - value_to_shape_or_data_exprs_.emplace(value, shape_or_data); - } else { - iter->second = shape_or_data; - } - } + const symbol::ShapeOrDataDimExprs& shape_or_data); void WalkOps(const std::function& VisitOp) const { for (const auto& op : ops_) { @@ -164,23 +75,17 @@ class OpLoweringGroup { } const std::vector<::pir::Operation*>& ops() const { return ops_; } - std::vector<::pir::Operation*>& mut_ops() { return ops_; } - void SetOps(const std::vector<::pir::Operation*>& new_ops) { ops_ = new_ops; } const std::vector& input_names() const { return this->input_names_; } - std::vector& mut_input_names() { return this->input_names_; } - const std::vector& output_names() const { return this->output_names_; } - std::vector& mut_output_names() { return this->output_names_; } - const std::vector<::pir::Value>& output_values() const { return this->output_values_; } @@ -188,22 +93,25 @@ class OpLoweringGroup { std::vector<::pir::Value>& mut_output_values() { return this->output_values_; } - const std::unordered_set<::pir::Operation*>& output_ops() const { return this->output_ops_; } - std::unordered_set<::pir::Operation*>& mut_output_ops() { return this->output_ops_; } std::shared_ptr mut_map_expr_ctx() { - CHECK_NOTNULL(map_expr_ctx_); + PADDLE_ENFORCE_NOT_NULL( + map_expr_ctx_, + ::common::errors::Unavailable("Required map_expr_ctx_ != nullptr.")); return map_expr_ctx_; } const adt::MapExprCtx& map_expr_ctx() const { - return *CHECK_NOTNULL(map_expr_ctx_); + PADDLE_ENFORCE_NOT_NULL( + map_expr_ctx_, + ::common::errors::Unavailable("Required map_expr_ctx_ != nullptr.")); + return *map_expr_ctx_; } void set_value_to_shape_or_data_exprs( @@ -285,6 +193,7 @@ class OpLoweringGroup { std::string group_id_{common::UniqName("group_")}; // op in this group std::vector<::pir::Operation*> ops_; + std::string fn_name_; // output ops of the group. std::unordered_set<::pir::Operation*> output_ops_; // op pattern kind. @@ -293,7 +202,6 @@ class OpLoweringGroup { std::vector input_names_; std::vector output_names_; std::vector<::pir::Value> output_values_; - std::string fn_name_{""}; std::map int_args_map_; alignment_schedule_info_t alignment_schedule_info_; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc index eea87c639cc96..bab37b959ddfc 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc @@ -204,6 +204,7 @@ BucketLoweredFuncsWrapper OpLowererImpl::BucketLower( if (ops.size() == 1 && ops[0]->name() == "custom_call") { return {{{ir::Expr(1), LowerCustomCall(group)[0]}}, ir::LoweredFunc()}; } + std::vector group_func_arg_tensors; std::unordered_map<::pir::Value, ir::Tensor> tensor_map; // for some op, it will output more tmp value and regard as diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc b/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc index 8b97871211a55..91cc298a044c7 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc +++ b/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc @@ -385,8 +385,8 @@ bool FusionNode::IsTrivial() const { bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down) {} -std::vector TransformReduceLoopRange(const ReduceOp& upstream, - FusibleOp* downstream) { +std::vector FusionGraph::TransformReduceLoopRange( + const ReduceOp& upstream, FusibleOp* downstream) { // downstream will be mutated by this transform. VLOG(4) << "RRTransform begin"; VLOG(4) << "RRTransform Upstream is \n" << _GetRootExpr(upstream); @@ -396,13 +396,20 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, modified_downstream_compute_body, GetOutputTensor(upstream)); std::vector results; ir::Tensor downstream_output_tensor = GetOutputTensor(*downstream); + + bool is_trivial_downstream = std::holds_alternative(*downstream); + const auto create_new_tensor = [&](const ir::Tensor& downstream_load_tensor) { VLOG(4) << "Create New Tensor Start"; ir::Tensor result = ir::Tensor( downstream_load_tensor->name + "_" + FusionNode::GetTensorCounter(), downstream_load_tensor->type(), - downstream_output_tensor->shape, - downstream_output_tensor->domain, + is_trivial_downstream + ? FilterWithFakeReduceIter(downstream_output_tensor->shape) + : downstream_output_tensor->shape, + is_trivial_downstream + ? FilterWithFakeReduceIter(downstream_output_tensor->domain) + : downstream_output_tensor->domain, GetOutputTensor(upstream)->operation, GetReduceIters(upstream)); result->WithBuffer(); @@ -414,7 +421,9 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, const auto& new_tensor = create_new_tensor(load_tensor.As()->tensor.as_tensor_ref()); ir::Expr new_reduce = CreateReduceExpr( - GetOutputIters(*downstream), + is_trivial_downstream + ? FilterWithFakeReduceIter(GetOutputIters(*downstream)) + : GetOutputIters(*downstream), GetReduceIters(upstream), GetInitExpr(upstream), ComposeUtils::CopyedReplaceExpr(GetComputeBody(upstream), @@ -423,10 +432,15 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, new_tensor, GetOutputTensor(upstream)); results.emplace_back(ReduceOp(new_reduce)); + VLOG(4) << "After Tmp Transform, upstream is : \n" + << _GetRootExpr(results.back()); ExprTransformerUtils::ReplaceTarget( &modified_downstream_compute_body, load_tensor, - new_tensor(ComposeUtils::VarVec2ExprVec(GetOutputIters(*downstream)))); + new_tensor(ComposeUtils::VarVec2ExprVec( + is_trivial_downstream + ? FilterWithFakeReduceIter(GetOutputIters(*downstream)) + : GetOutputIters(*downstream)))); } _SetFuncBody(*downstream, CreateExprWithNewComputeBody(*downstream, @@ -436,7 +450,8 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, return results; } -FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream) { +FusibleOp FusionGraph::TrivialFusion(FusionNode* upstream, + FusionNode* downstream) { CHECK(upstream->IsTrivial()); if (downstream->IsTrivial()) { return TrivalxOther_Fusion(std::get(upstream->fusible_op), @@ -447,21 +462,55 @@ FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream) { } } -FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, ReduceOp reduce_op) { +FusibleOp FusionGraph::SinkTrivialLoopAlign(TrivialOp trivial_op, + ReduceOp reduce_op) { + VLOG(4) << "SinkTrivialLoopAlign"; ir::Expr new_trivial_body = ir::ir_utils::IRCopy(trivial_op.GetFuncBody()); - ir::Var last_iter = GetOutputIters(trivial_op).back(); - ir::Expr trivial_last_for = (ExprSetFinderUtils::ChildFors * - ExprSetFinderUtils::IsForIterVar(last_iter)) - .GetSingle(new_trivial_body); + std::vector all_out_iter_vars = GetOutputIters(trivial_op); + std::vector non_reduce_iter_vars = + FilterWithFakeReduceIter(all_out_iter_vars); + std::vector fake_reduce_iter_vars; + for (const auto& idx : fake_reduce_iter_idx_) { + fake_reduce_iter_vars.emplace_back( + all_out_iter_vars.at(static_cast(idx))); + } + + VLOG(4) << "all_out_iter_vars: " + << cinn::utils::Join(all_out_iter_vars, ", "); + VLOG(4) << "non_reduce_iter_vars: " + << cinn::utils::Join(non_reduce_iter_vars, ", "); + VLOG(4) << "fake_reduce_iter_vars: " + << cinn::utils::Join(fake_reduce_iter_vars, ", "); + + ir::Expr trivial_last_for = + (ExprSetFinderUtils::ChildFors * + ExprSetFinderUtils::IsForIterVar(all_out_iter_vars.back())) + .GetSingle(new_trivial_body); ir::Expr new_for_body = trivial_last_for.As()->body; - new_for_body = ExprTransformerUtils::WrapForsTransformer( - GetReduceIters(reduce_op))(new_for_body); - trivial_last_for.As()->body = new_for_body; - return TrivialOp(new_trivial_body); + + const auto ExpandIterVars = [&]() { + std::vector result = + ComposeUtils::ConcatVector(non_reduce_iter_vars, fake_reduce_iter_vars); + auto upstream_reduce_iters = GetReduceIters(reduce_op); + if (fake_reduce_iter_vars.size() != upstream_reduce_iters.size()) { + result.insert(result.end(), + upstream_reduce_iters.begin(), + upstream_reduce_iters.end()); + } + VLOG(4) << "ExpandIterVars: " << cinn::utils::Join(result, ", "); + return result; + }; + + ir::Expr new_schedule_realizer = + (ExprTransformerUtils::WrapForsTransformer(ExpandIterVars()) * + ExprTransformerUtils::WrapScheduleRealizer({}, "root"))(new_for_body); + + VLOG(4) << "new_schedule_realizer\n" << new_schedule_realizer; + return TrivialOp(new_schedule_realizer); } -std::vector ReduceTransformRecursive(FusibleOp root_op, - FusionNode* fusion_tree) { +std::vector FusionGraph::ReduceTransformRecursive( + FusibleOp root_op, FusionNode* fusion_tree) { VLOG(4) << "ReduceTransformRecursive: " << *_GetFuncBodyPointer(root_op); std::vector result; for (auto& pair : fusion_tree->upstream) { @@ -485,7 +534,7 @@ std::vector ReduceTransformRecursive(FusibleOp root_op, return result; } -std::vector ReduceTransform(FusionNode* downstream) { +std::vector FusionGraph::ReduceTransform(FusionNode* downstream) { if (downstream->IsTrivial() && downstream->upstream.empty()) { return {downstream->fusible_op}; } @@ -512,30 +561,39 @@ std::vector FilterVector(const std::vector& ops, const F& f) { return res; } -FusionGraph::FusionGraph(const std::vector<::pir::Operation*>& ops, - const std::vector& op_compute_bodies) { - // shardable_axes_ = InferShardableAxes(ops); +FusionGraph::FusionGraph( + const cinn::frontend::group_cluster::PatternNodePtr& pattern_node, + const std::unordered_map<::pir::Operation*, ir::Expr>& op_expr_map) { VLOG(4) << "CreateFusionGraph"; - const auto& filtered_ops = FilterVector(ops, [](const ::pir::Operation* op) { - if (op->name() == "cinn_op.generate_shape") { - return false; - } - return true; - }); - const auto& op_patterns = GetOpPatternKindVector(filtered_ops); + + std::vector<::pir::Operation*> ops = pattern_node->GetOps(); + std::vector op_compute_bodies = std::vector(); + std::transform(ops.begin(), + ops.end(), + std::back_inserter(op_compute_bodies), + [&](::pir::Operation* op) { return op_expr_map.at(op); }); + + if (pattern_node->IsReduceTrivial()) { + fake_reduce_iter_idx_ = + std::get( + pattern_node->stmt_pattern_) + .fake_reduce_iter_idx; + } + + const auto& op_patterns = GetOpPatternKindVector(ops); CheckFusionInputValid(op_compute_bodies, op_patterns); std::unordered_map<::pir::Operation*, FusionNode*> op_to_node_map; - for (int i = 0; i < filtered_ops.size(); ++i) { + for (int i = 0; i < ops.size(); ++i) { FusionNode* node = new FusionNode(CreateFusibleOp(op_compute_bodies[i], op_patterns[i])); - op_to_node_map[filtered_ops[i]] = node; + op_to_node_map[ops[i]] = node; all_fusion_nodes_.emplace(node); - node->expr_related_op = filtered_ops[i]; + node->expr_related_op = ops[i]; } - for (::pir::Operation* op : filtered_ops) { + for (::pir::Operation* op : ops) { FusionNode* cur_node = op_to_node_map[op]; // add upstream nodes @@ -769,11 +827,28 @@ FusionNode* FusionGraph::FindReduceUpstream(FusionNode* node) { } // namespace trivial_fusion_detail std::vector OperationFusion( - const std::vector<::pir::Operation*>& ops, + const std::vector<::pir::Operation*>& original_ops, const std::vector& op_compute_bodies) { - trivial_fusion_detail::FusionGraph graph = - trivial_fusion_detail::FusionGraph(ops, op_compute_bodies); - auto output = graph.DoFusion(); + const auto& ops = trivial_fusion_detail::FilterVector( + original_ops, [](const ::pir::Operation* op) { + if (op->name() == "cinn_op.generate_shape") { + return false; + } + return true; + }); + + auto output = std::vector(); + auto op_expr_map = + trivial_fusion_detail::ComposeUtils::MakeMap(ops, op_compute_bodies); + + auto frontend_cluster_result = cinn::frontend::ClusterOps(ops); + for (const auto& frontend_node : frontend_cluster_result) { + trivial_fusion_detail::FusionGraph graph = + trivial_fusion_detail::FusionGraph(frontend_node, op_expr_map); + output = trivial_fusion_detail::ComposeUtils::ConcatVector( + output, graph.DoFusion()); + } + VLOG(4) << "Fusion Result: output size is " << output.size(); for (const auto& expr : output) { VLOG(4) << expr; diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_impl.h b/paddle/cinn/hlir/framework/pir/trivial_op_impl.h index f5964ad854848..27b8705db107b 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_impl.h +++ b/paddle/cinn/hlir/framework/pir/trivial_op_impl.h @@ -13,8 +13,10 @@ // limitations under the License. #pragma once +#include #include +#include "paddle/cinn/frontend/group_cluster/group_cluster.h" #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" #include "paddle/cinn/hlir/framework/compile_error.h" #include "paddle/cinn/hlir/framework/pir/op_lowering_util.h" @@ -121,73 +123,80 @@ struct FusionNode { bool IsTrivial() const; }; -template -DownStreamOp TrivalxOther_Fusion(TrivialOp upstream, DownStreamOp downstream) { - VLOG(4) << "Trivial x OtherFusion begin."; - - const auto& replaced_tensor = GetOutputTensor(upstream); - VLOG(4) << "upstream is " << upstream.GetFuncBody(); - VLOG(4) << "downstream is " << downstream.GetFuncBody(); - - ir::Expr modified_body = ir::ir_utils::IRCopy(downstream.GetFuncBody()); - SequenceMutator( - ComposeUtils::GetEachTensorLoadExpr(modified_body, replaced_tensor), - &modified_body, - [&](const ir::Expr& downstream_load_expr, ir::Expr* downstream_body) { - ComposeUtils::ReplaceDownstreamLoadExprWithUpstreamComputeBody( - upstream, downstream_load_expr, downstream_body); - }); - - VLOG(4) << "TTFusion end:\n" << modified_body; - return DownStreamOp(modified_body); -} - bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down); -std::vector TransformReduceLoopRange(const ReduceOp& upstream, - FusibleOp* downstream); - -FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream); - -FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, ReduceOp reduce_op); - -std::vector ReduceTransformRecursive(FusibleOp root_op, - FusionNode* fusion_tree); -std::vector ReduceTransform(FusionNode* downstream); - FusibleOp CreateFusibleOp(ir::Expr compute_body, OpPatternKind op_pattern); struct FusionGraph { - explicit FusionGraph(const std::vector<::pir::Operation*>& ops, - const std::vector& op_compute_bodies); - + explicit FusionGraph( + const cinn::frontend::group_cluster::PatternNodePtr& pattern_node, + const std::unordered_map<::pir::Operation*, ir::Expr>& op_expr_map); ~FusionGraph(); std::vector DoFusion(); private: FusionNode* FindTrivialFusibleNode(); - void DoTrivialFusion(); - void ReduceLoopTranform(); - void SplitReduceTransform(); - std::vector GetExprResults(); - void RemoveNode(FusionNode* node); - void AppendNode(FusionNode* node); - FusionNode* FindReduceUpstream(FusionNode* node); + private: + FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream); + + template + DownStreamOp TrivalxOther_Fusion(TrivialOp upstream, + DownStreamOp downstream) { + VLOG(4) << "Trivial x OtherFusion begin."; + + const auto& replaced_tensor = GetOutputTensor(upstream); + VLOG(4) << "upstream is " << upstream.GetFuncBody(); + VLOG(4) << "downstream is " << downstream.GetFuncBody(); + + ir::Expr modified_body = ir::ir_utils::IRCopy(downstream.GetFuncBody()); + SequenceMutator( + ComposeUtils::GetEachTensorLoadExpr(modified_body, replaced_tensor), + &modified_body, + [&](const ir::Expr& downstream_load_expr, ir::Expr* downstream_body) { + ComposeUtils::ReplaceDownstreamLoadExprWithUpstreamComputeBody( + upstream, downstream_load_expr, downstream_body); + }); + + VLOG(4) << "TTFusion end:\n" << modified_body; + return DownStreamOp(modified_body); + } + + std::vector ReduceTransform(FusionNode* downstream); + std::vector ReduceTransformRecursive(FusibleOp root_op, + FusionNode* fusion_tree); + std::vector TransformReduceLoopRange(const ReduceOp& upstream, + FusibleOp* downstream); + FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, ReduceOp reduce_op); + + template + std::vector FilterWithFakeReduceIter(const std::vector& input) { + std::vector result; + for (size_t i = 0; i < input.size(); i++) { + if (std::find(fake_reduce_iter_idx_.begin(), + fake_reduce_iter_idx_.end(), + i) == fake_reduce_iter_idx_.end()) { + result.emplace_back(input.at(i)); + } + } + return result; + } + private: std::unordered_set all_fusion_nodes_; std::vector fusion_results_; std::unordered_set entrance_nodes_; std::unordered_set exit_nodes_; + std::vector fake_reduce_iter_idx_; // std::unordered_map<::pir::Value, ShardableAxes> shardable_axes_; }; diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_util.cc b/paddle/cinn/hlir/framework/pir/trivial_op_util.cc index 9b776aae4e454..c930aa8a8fd95 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_util.cc +++ b/paddle/cinn/hlir/framework/pir/trivial_op_util.cc @@ -502,7 +502,7 @@ void CheckFusionInputValid(const std::vector& op_compute_bodies, const std::vector& op_patterns) { if (VLOG_IS_ON(4)) { for (const auto& func : op_compute_bodies) { - VLOG(4) << "TrivialOpFusion: {FuncBody is} :" << func; + VLOG(4) << "FuncBody is :" << func; } for (const auto& op_ptn : op_patterns) { VLOG(4) << "OpPattern is :" << op_ptn; diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_util.h b/paddle/cinn/hlir/framework/pir/trivial_op_util.h index e28cad31310f7..9dbddc6ada18c 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_util.h +++ b/paddle/cinn/hlir/framework/pir/trivial_op_util.h @@ -46,6 +46,18 @@ std::vector ConcatVector(const std::vector& first, return result; } +template +std::unordered_map MakeMap(const std::vector& keys, + const std::vector& values) { + std::unordered_map result = std::unordered_map(); + + CHECK(keys.size() == values.size()); + for (int i = 0; i < keys.size(); i++) { + result[keys[i]] = values[i]; + } + return result; +} + std::vector ExprVec2VarVec(const std::vector& in); std::vector VarVec2ExprVec(const std::vector& in); diff --git a/paddle/cinn/hlir/framework/pir/utils.cc b/paddle/cinn/hlir/framework/pir/utils.cc index 942bf35f3f8eb..5d7d1aa3ac0fa 100644 --- a/paddle/cinn/hlir/framework/pir/utils.cc +++ b/paddle/cinn/hlir/framework/pir/utils.cc @@ -125,23 +125,22 @@ class OpTransInfo { DeParamCondT deny_param_cond_{{"batch_norm", {"ReserveSpace"}}, {"batch_norm_grad", {"ReserveSpace"}}}; - std::unordered_set default_deny_ops_{ - "feed", - "fetch", - "conv2d", - "conv2d_grad", - "depthwise_conv2d", - "depthwise_conv2d_grad", - "dropout", - "pool2d", - "pool2d_grad", - "split", - "matmul", - "matmul_grad", - "embedding_grad", - "embedding", - "arange", - }; + std::unordered_set default_deny_ops_{"feed", + "fetch", + "conv2d", + "conv2d_grad", + "depthwise_conv2d", + "depthwise_conv2d_grad", + "dropout", + "pool2d", + "pool2d_grad", + "split", + "matmul", + "matmul_grad", + "embedding_grad", + "embedding", + "arange", + "softmax"}; }; std::string OpNameAfterStripDialect(const ::pir::Operation& op) { @@ -419,12 +418,12 @@ std::string CompatibleInfo::OpFuncName(const ::pir::Operation& op) { std::string CompatibleInfo::GroupOpsName( const std::vector<::pir::Operation*>& ops) { - std::string name = "fn"; + std::string name = "fn_"; for (auto* op : ops) { - std::string op_name = OpName(*op); - name += "_" + cinn::common::Context::Global().NewName(op_name); + name += OpName(*op); + name += "_"; } - return name; + return cinn::common::Context::Global().NewName(name); } std::string CompatibleInfo::ValueName(const ::pir::Value& value) { diff --git a/paddle/cinn/hlir/framework/pir_compiler.cc b/paddle/cinn/hlir/framework/pir_compiler.cc index 2db39508ce1e1..73f2d11f3e1b4 100644 --- a/paddle/cinn/hlir/framework/pir_compiler.cc +++ b/paddle/cinn/hlir/framework/pir_compiler.cc @@ -16,23 +16,128 @@ #include "paddle/cinn/hlir/framework/pir/utils.h" #include "paddle/cinn/utils/multi_threading.h" +#include "paddle/common/enforce.h" +#include "paddle/common/flags.h" + +PD_DECLARE_bool(enable_cinn_compile_cache); namespace cinn::hlir::framework { +class CompilationContextMapper { + public: + CompilationContextMapper(const Target& target, + const std::vector& groups) { + Construct(target, groups); + } + std::vector& UniqueCompilationContexts() { + return group_compilation_contexts_; + } + std::vector>& + MutableCompilationResult() { + return compilation_results_; + } + + std::vector RecoverKernelInfos(); + void UpdateGlobalCache(); + void SetFinalize(bool val) { is_finalized_ = val; } + + private: + void Construct(const Target& target, + const std::vector& groups); + std::vector mapper_index_; + std::vector fusion_infos_; + std::vector group_compilation_contexts_; + std::vector> compilation_results_; + + bool is_finalized_{false}; +}; + std::vector PirCompiler::Build( const std::vector& groups) { - std::vector kernel_infos(groups.size()); - for (int i = 0; i < groups.size(); ++i) { - group_compilation_contexts_.emplace_back(target_, groups[i]); + CompilationContextMapper ctx_mapper(target_, groups); + auto& group_compilation_contexts = ctx_mapper.UniqueCompilationContexts(); + auto& compilation_results = ctx_mapper.MutableCompilationResult(); + + const size_t task_size = group_compilation_contexts.size(); + const size_t thread_size = FLAGS_enable_cinn_compile_cache ? task_size : 1; + VLOG(5) << "Found " << task_size << " new groups parsed from " + << groups.size(); + if (task_size > 0) { + auto worker_fn = [&](int index) { + CompilationTask task(&group_compilation_contexts[index]); + compilation_results[index] = task(); + }; + utils::parallel_run(worker_fn, + utils::SequenceDispatcher(0, task_size), + /*thread_num=*/thread_size); } - auto worker_fn = [&](int index) { - CompilationTask task(&group_compilation_contexts_[index]); - task(); - kernel_infos[index] = task.GetCINNKernelInfo(); + ctx_mapper.SetFinalize(true); + ctx_mapper.UpdateGlobalCache(); + return ctx_mapper.RecoverKernelInfos(); +} + +void CompilationContextMapper::Construct( + const Target& target, const std::vector& groups) { + std::unordered_set unique_infos; + const auto IsNewAndUnique = + [&unique_infos](const pir::FusionInfo& info) -> bool { + const bool is_unique = unique_infos.find(info.hash()) == unique_infos.end(); + const bool is_new = !CompilationCache::Instance().Has(info); + return is_new && is_unique; }; - utils::parallel_run( - worker_fn, utils::SequenceDispatcher(0, groups.size()), -1); + + for (size_t i = 0; i < groups.size(); ++i) { + fusion_infos_.emplace_back(*groups[i]); + // If FLAGS_enable_cinn_compile_cache=False, Cache strategy will not take + // effects. + if (IsNewAndUnique(fusion_infos_[i]) || !FLAGS_enable_cinn_compile_cache) { + mapper_index_.push_back(i); + group_compilation_contexts_.emplace_back(target, groups[i]); + compilation_results_.push_back( + std::make_shared(target)); + } + unique_infos.insert(fusion_infos_[i].hash()); + } +} + +std::vector +CompilationContextMapper::RecoverKernelInfos() { + PADDLE_ENFORCE_EQ( + is_finalized_, + true, + ::common::errors::PreconditionNotMet( + "Required is_finalized_ = true, please call SetFinalize() firstly.")); + PADDLE_ENFORCE_EQ(group_compilation_contexts_.size(), + compilation_results_.size(), + ::common::errors::PreconditionNotMet( + "Required group_compilation_contexts_.size() = " + "compilation_results_.size().")); + + std::vector kernel_infos(fusion_infos_.size()); + for (size_t i = 0; i < fusion_infos_.size(); ++i) { + kernel_infos[i] = + CompilationCache::Instance().GetKernelInfo(fusion_infos_[i]); + } return kernel_infos; } +void CompilationContextMapper::UpdateGlobalCache() { + PADDLE_ENFORCE_EQ( + is_finalized_, + true, + ::common::errors::PreconditionNotMet( + "Required is_finalized_ = true, please call SetFinalize() firstly.")); + for (size_t i = 0; i < compilation_results_.size(); ++i) { + PADDLE_ENFORCE_LT(mapper_index_[i], + fusion_infos_.size(), + ::common::errors::PreconditionNotMet( + "Required mapper_index < fusion_infos_.size().")); + const auto& fusion_info = fusion_infos_[mapper_index_[i]]; + const auto& int_args_map = + compilation_results_[i]->GetBackendResource()->GetIntArgsMap(); + VLOG(5) << "Insert new compiled result into cache, fusion_info: " + << fusion_info << ", int_args_map: " << int_args_map; + CompilationCache::Instance().Insert(fusion_info, compilation_results_[i]); + } +} } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir_compiler.h b/paddle/cinn/hlir/framework/pir_compiler.h index d9429b76a6fa8..9ea83defa0cb9 100644 --- a/paddle/cinn/hlir/framework/pir_compiler.h +++ b/paddle/cinn/hlir/framework/pir_compiler.h @@ -31,7 +31,6 @@ class PirCompiler final { CINN_DISALLOW_COPY_AND_ASSIGN(PirCompiler); Target target_; - std::vector group_compilation_contexts_; }; } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/pe/broadcast.cc b/paddle/cinn/hlir/pe/broadcast.cc index fb47ed737fdf3..fab2af9c5f0dc 100644 --- a/paddle/cinn/hlir/pe/broadcast.cc +++ b/paddle/cinn/hlir/pe/broadcast.cc @@ -400,10 +400,7 @@ Tensor BroadcastTo(const Tensor& A, } else if (MathEqual(a_shape_i, out_shape[idx])) { broadcast_indice.push_back(indice[idx]); } else { - std::stringstream ss; - ss << "fail to broad cast input shape " << a_shape_i - << " to output shape " << out_shape[idx]; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); + broadcast_indice.push_back(indice[idx] % a_shape_i); } } return A(broadcast_indice); diff --git a/paddle/cinn/ir/schedule/ir_schedule.h b/paddle/cinn/ir/schedule/ir_schedule.h index cab1b0d38d868..7927efdaa277f 100644 --- a/paddle/cinn/ir/schedule/ir_schedule.h +++ b/paddle/cinn/ir/schedule/ir_schedule.h @@ -32,11 +32,11 @@ namespace cinn { namespace ir { /** - * A struct containing all the schedule primitives. Each shedule primitive is a - * member function of IRSchedule. Schedule primitves are implmented by + * A struct containing all the schedule primitives. Each schedule primitive is a + * member function of IRSchedule. Schedule primitives are implemented by * StScheduleImpl manipulating the AST - IR(Expr). To support serializing and * replaying, each schedule primitive should append a ScheduleDesc::Step to the - * trace_ in its corresponding function implment. + * trace_ in its corresponding function implement. */ class IRSchedule { public: @@ -353,7 +353,7 @@ class IRSchedule { * If the rfactor loop is k and rf_axis is 0, the rfactor transformation is * divided into 2 steps: * 1. get the rfactor block where the reduce loop k is transformed to the - * serial loop with no accumalation and a new rfactor tensor is created. The + * serial loop with no accumulation and a new rfactor tensor is created. The * axis k will be placed in the rf_axis of the new rf_tensor. The rf_block is * as follows: \code for (rf_k, 0, 30) // rfactor loop k is transformed * to the serial loop. for (i, 0, 10) // serial loop for (j, 0, 20) // @@ -390,7 +390,7 @@ class IRSchedule { * If the rf loop is j and rf_axis is 0, the transformation is * divided into 2 steps: * 1. get the rf block where the reduce loop j is transformed to the - * serial loop with no accumalation and a new rf tensor is created. + * serial loop with no accumulation and a new rf tensor is created. * The axis j will be placed in the rf_axis of the new rf_tensor. * The rf_block is as follows: * \code @@ -457,7 +457,7 @@ class IRSchedule { /*! * \brief Insert a tag in schedule_desc to mark the beginning of post - * processing, the schedue primitive itself does not make any changes to the + * processing, the schedule primitive itself does not make any changes to the * IR. */ void TagPostSchedule(); @@ -491,7 +491,7 @@ class IRSchedule { /*! * \brief The base class of the inliner, which handles: * 1) Remove the block to be lined - * 2) Maintain a list of index variables and their substition of the buffer + * 2) Maintain a list of index variables and their substitution of the buffer * being inlined */ class BaseInliner : public ir::IRMutator<> { diff --git a/paddle/cinn/ir/schedule/schedule_base.cc b/paddle/cinn/ir/schedule/schedule_base.cc index b34221d73f052..885391aecd073 100644 --- a/paddle/cinn/ir/schedule/schedule_base.cc +++ b/paddle/cinn/ir/schedule/schedule_base.cc @@ -105,7 +105,7 @@ void ScheduleBase::Broadcast(const std::string& block_name, } std::vector all_loops = this->GetLoops(block_name); if (axes[0] >= all_loops.size()) { - throw std::runtime_error("axes execeed loop size"); + throw std::runtime_error("axes exceed loop size"); } // Get Last loop @@ -150,14 +150,14 @@ void ScheduleBase::Broadcast(const std::string& block_name, auto stride = Expr(1); auto in_offset = Expr(0); - std::set brodacast_set(info.broadcast_axes.begin(), + std::set broadcast_set(info.broadcast_axes.begin(), info.broadcast_axes.end()); for (int i = all_loops.size() - 1; i >= 0; --i) { auto loop_temp = all_loops[i].As(); offset = offset + loop_temp->loop_var * stride; stride = stride * loop_temp->extent; - if (!brodacast_set.count(i)) { + if (!broadcast_set.count(i)) { in_offset = in_offset + loop_temp->loop_var * stride; } } diff --git a/paddle/cinn/pybind/CMakeLists.txt b/paddle/cinn/pybind/CMakeLists.txt index ec409578930df..970203a273389 100755 --- a/paddle/cinn/pybind/CMakeLists.txt +++ b/paddle/cinn/pybind/CMakeLists.txt @@ -15,6 +15,8 @@ set(srcs utils.cc schedule.cc) +gather_srcs(cinnapi_src SRCS ${srcs}) + if(WITH_CUDA) message(STATUS "Compile core_api with CUDA support") cinn_nv_library( diff --git a/paddle/cinn/pybind/bind.cc b/paddle/cinn/pybind/bind.cc index 4c20f22b973cf..6882a1ac87208 100644 --- a/paddle/cinn/pybind/bind.cc +++ b/paddle/cinn/pybind/bind.cc @@ -21,27 +21,28 @@ namespace py = pybind11; namespace cinn::pybind { -PYBIND11_MODULE(core_api, m) { - m.doc() = "CINN core API"; - - py::module runtime = m.def_submodule("runtime", "bind cinn_runtime"); - py::module common = m.def_submodule("common", "namespace cinn::common"); - py::module lang = m.def_submodule("lang", "namespace cinn::lang"); - py::module ir = m.def_submodule("ir", "namespace cinn::ir"); - py::module poly = m.def_submodule("poly", "namespace cinn::poly, polyhedral"); - py::module backends = m.def_submodule( +void BindCINN(py::module *m) { + py::module cinn = + m->def_submodule("cinn", "Compiler Infrastructure for Neural Networks"); + py::module runtime = cinn.def_submodule("runtime", "bind cinn_runtime"); + py::module common = cinn.def_submodule("common", "namespace cinn::common"); + py::module lang = cinn.def_submodule("lang", "namespace cinn::lang"); + py::module ir = cinn.def_submodule("ir", "namespace cinn::ir"); + py::module poly = + cinn.def_submodule("poly", "namespace cinn::poly, polyhedral"); + py::module backends = cinn.def_submodule( "backends", "namespace cinn::backends, execution backends"); - py::module optim = - m.def_submodule("optim", "namespace cinn::optim, CINN IR optimization"); - py::module pe = m.def_submodule( + py::module optim = cinn.def_submodule( + "optim", "namespace cinn::optim, CINN IR optimization"); + py::module pe = cinn.def_submodule( "pe", "namespace cinn::hlir::pe, CINN Primitive Emitters"); py::module frontend = - m.def_submodule("frontend", "namespace cinn::frontend, CINN frontend"); - py::module framework = m.def_submodule( + cinn.def_submodule("frontend", "namespace cinn::frontend, CINN frontend"); + py::module framework = cinn.def_submodule( "framework", "namespace cinn::hlir::framework, CINN framework"); py::module utils = - m.def_submodule("utils", "namespace cinn::utils, CINN framework"); - py::module schedule = m.def_submodule( + cinn.def_submodule("utils", "namespace cinn::utils, CINN framework"); + py::module schedule = cinn.def_submodule( "schedule", "namespace cinn::ir::schedule, CINN Schedule"); BindRuntime(&runtime); diff --git a/paddle/cinn/pybind/bind.h b/paddle/cinn/pybind/bind.h index 77566097a19aa..bd9f69ece3c7f 100644 --- a/paddle/cinn/pybind/bind.h +++ b/paddle/cinn/pybind/bind.h @@ -53,4 +53,8 @@ void BindFrontend(pybind11::module *m); void BindFramework(pybind11::module *m); void BindUtils(pybind11::module *m); void BindSchedule(pybind11::module *m); + +__attribute__((visibility("default"))) extern void BindCINN( + pybind11::module *m); + } // namespace cinn::pybind diff --git a/paddle/cinn/pybind/frontend.cc b/paddle/cinn/pybind/frontend.cc index b6ba2590f3dad..b21ae95cd9629 100644 --- a/paddle/cinn/pybind/frontend.cc +++ b/paddle/cinn/pybind/frontend.cc @@ -937,7 +937,7 @@ void BindFrontend(pybind11::module *m) { .def("get_cinn_name", [](PaddleModelConvertor &self, const std::string &paddle_name) { CHECK(self.var_model_to_program_map().count(paddle_name)) - << "Cannot find variabel " << paddle_name + << "Cannot find variable " << paddle_name << " in CINN! Please check."; return self.var_model_to_program_map().at(paddle_name); }); diff --git a/paddle/cinn/runtime/cpu/CMakeLists.txt b/paddle/cinn/runtime/cpu/CMakeLists.txt index 0971d5a95dbae..804ee29ca5377 100644 --- a/paddle/cinn/runtime/cpu/CMakeLists.txt +++ b/paddle/cinn/runtime/cpu/CMakeLists.txt @@ -4,7 +4,7 @@ gather_srcs(cinnapi_src SRCS host_intrinsics.cc thread_backend.cc) if(WITH_MKL_CBLAS) gather_srcs(cinnapi_src SRCS mkl_math.cc cblas.cc) - if(WITH_MKLDNN) + if(WITH_ONEDNN) gather_srcs(cinnapi_src SRCS onednn_math.cc) endif() endif() @@ -15,7 +15,7 @@ if(WITH_MKL_CBLAS) cinn_cc_test(test_mkl_math SRCS mkl_math_test.cc mkl_math.cc DEPS cinncore) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) cinn_cc_test(test_onednn_math SRCS onednn_math_test.cc onednn_math.cc DEPS cinncore) endif() diff --git a/paddle/cinn/runtime/flags.cc b/paddle/cinn/runtime/flags.cc index c310a47f5f180..e4fd6e31f665a 100644 --- a/paddle/cinn/runtime/flags.cc +++ b/paddle/cinn/runtime/flags.cc @@ -75,7 +75,7 @@ PD_DEFINE_bool(group_schedule_tiling_first, "Whether to enable new group scheduler tiling first strategy."); PD_DEFINE_bool(cinn_new_cluster_op_method, - BoolFromEnv("FLAGS_cinn_new_cluster_op_method", false), + BoolFromEnv("FLAGS_cinn_new_cluster_op_method", true), "Whether to enable newly developed clustering method of group " "op for cinn."); diff --git a/paddle/common/enforce.h b/paddle/common/enforce.h index 6076e9089df83..b3027d55c8065 100644 --- a/paddle/common/enforce.h +++ b/paddle/common/enforce.h @@ -362,47 +362,5 @@ inline bool is_error(const T& stat) { } namespace pir { -class IrNotMetException : public std::exception { - public: - explicit IrNotMetException(const std::string& str) - : err_str_(str + ::common::enforce::GetCurrentTraceBackString()) {} - - const char* what() const noexcept override { return err_str_.c_str(); } - - private: - std::string err_str_; - ::common::enforce::details::PaddleFatalGuard paddle_fatal_guard_; -}; - -#define IR_THROW(...) \ - do { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occurred at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } while (0) - -#define IR_ENFORCE(COND, ...) \ - do { \ - bool __cond__(COND); \ - if (UNLIKELY(is_error(__cond__))) { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occurred at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } \ - } while (0) - +#define IR_THROW(...) PADDLE_THROW(phi::errors::Fatal(__VA_ARGS__)) } // namespace pir diff --git a/paddle/common/flags.cc b/paddle/common/flags.cc index 35237b3a2f51f..ef18cc0c1804e 100644 --- a/paddle/common/flags.cc +++ b/paddle/common/flags.cc @@ -741,13 +741,13 @@ PHI_DEFINE_EXPORTED_bool(set_to_1d, false, "set 0D Tensor to 1D numpy"); /** * Debug related FLAG - * Name: tracer_mkldnn_ops_on + * Name: tracer_onednn_ops_on * Since Version: 2.0.0 * Value Range: string, default=empty * Example: * Note: Holds list of operation types with OneDNN kernels to be enabled. */ -PHI_DEFINE_EXPORTED_string(tracer_mkldnn_ops_on, +PHI_DEFINE_EXPORTED_string(tracer_onednn_ops_on, "", "List of OneDNN operation types to be turned on"); @@ -765,13 +765,13 @@ PHI_DEFINE_EXPORTED_string(static_runtime_data_save_path, /** * Debug related FLAG - * Name: tracer_mkldnn_ops_off + * Name: tracer_onednn_ops_off * Since Version: 2.0.0 * Value Range: string, default=empty * Example: * Note: Holds list of operation types with OneDNN kernels to be disabled. */ -PHI_DEFINE_EXPORTED_string(tracer_mkldnn_ops_off, +PHI_DEFINE_EXPORTED_string(tracer_onednn_ops_off, "", "List of OneDNN operation types to be turned off"); @@ -1025,6 +1025,19 @@ PHI_DEFINE_EXPORTED_string(deny_cinn_ops, "", "It controls the cinn op subset to be not used."); +/* + * CINN related FLAG + * Name: FLAGS_deny_cinn_ops + * Since Version: 3.0 Beta + * Value Range: bool, default=true + * Example: FLAGS_enable_cinn_compile_cache=true would reuse cached Kernel + * function + */ +PHI_DEFINE_EXPORTED_bool( + enable_cinn_compile_cache, + true, + "It controls whether to enable cinn compilation cache."); + /* * CINN related FLAG * Name: FLAGS_enable_pe_launch_cinn @@ -1353,13 +1366,13 @@ PHI_DEFINE_EXPORTED_bool(use_shm_cache, * mmap_allocator related FLAG * Name: dataloader_use_file_descriptor * Since Version: 2.6.2 - * Value Range: bool, default=true + * Value Range: bool, default=false * Example: * Note: . If True, mmap_allocator will use file descripor to open shared memory * operation. */ PHI_DEFINE_EXPORTED_bool(dataloader_use_file_descriptor, - true, + false, "Use file descriptor in mmap_allocator."); /** @@ -1384,7 +1397,7 @@ PHI_DEFINE_EXPORTED_string(tensor_operants_mode, * Since Version: 2.6.0 * Value Range: bool, default=false * Example: - * Note: If Ture, executor will use new IR + * Note: If True, executor will use new IR */ PHI_DEFINE_EXPORTED_bool(enable_pir_in_executor, false, @@ -1397,7 +1410,7 @@ PHI_DEFINE_EXPORTED_bool(enable_pir_in_executor, * Since Version: 2.6.0 * Value Range: bool, default=true * Example: - * Note: If Ture, program will be translated to pir program + * Note: If True, program will be translated to pir program * and then run in executor for dy2st mode. */ PHI_DEFINE_EXPORTED_bool(enable_pir_with_pt_in_dy2st, @@ -1547,7 +1560,7 @@ PHI_DEFINE_EXPORTED_int64(alloc_fill_value, * Since Version: 3.0.0 * Value Range: bool, default=false * Example: - * Note: If Ture, will apply shape_optimization pass to new IR. + * Note: If True, will apply shape_optimization pass to new IR. */ PHI_DEFINE_EXPORTED_bool(pir_apply_shape_optimization_pass, false, diff --git a/paddle/common/union_find_set.h b/paddle/common/union_find_set.h new file mode 100644 index 0000000000000..b00c8ae7de8f5 --- /dev/null +++ b/paddle/common/union_find_set.h @@ -0,0 +1,72 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace common { + +template +class UnionFindSet { + public: + const T& Find(const T& x) const { + if (parent_.find(x) == parent_.end()) { + return x; + } + if (parent_.at(x) != x) return Find(parent_.at(x)); + return parent_.at(x); + } + + const T& Find(const T& x) { + if (parent_.find(x) == parent_.end()) { + return x; + } + if (parent_[x] != x) { + parent_[x] = Find(parent_[x]); + } + return parent_.at(x); + } + + void Union(const T& p, const T& q) { + if (parent_.find(p) == parent_.end()) { + parent_[p] = p; + } + if (parent_.find(q) == parent_.end()) { + parent_[q] = q; + } + parent_[Find(q)] = Find(p); + } + + template + void VisitCluster(const DoEachClusterT& DoEachCluster) const { + std::unordered_map> clusters_map; + for (auto it = parent_.begin(); it != parent_.end(); it++) { + clusters_map[Find(it->first)].emplace_back(it->first); + } + for (const auto& [_, clusters] : clusters_map) { + DoEachCluster(clusters); + } + } + + bool HasSameRoot(const T& p, const T& q) const { return Find(p) == Find(q); } + + std::unordered_map* GetMap() { return &parent_; } + + private: + std::unordered_map parent_; +}; + +} // namespace common diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec deleted file mode 100644 index d10ff999f6eb2..0000000000000 --- a/paddle/fluid/API.spec +++ /dev/null @@ -1,33 +0,0 @@ -paddle.incubate.optimizer.PipelineOptimizer (paddle.incubate.optimizer.PipelineOptimizer, ('document', '2e55a29dbeb874934f7a1a1af3a22b8c')) -paddle.incubate.optimizer.PipelineOptimizer.__init__ (ArgSpec(args=['self', 'optimizer', 'num_microbatches', 'start_cpu_core_id'], varargs=None, keywords=None, defaults=(1, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.incubate.optimizer.PipelineOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.audio.features (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.features.layers.LogMelSpectrogram (ArgSpec(), ('document', 'c38b53606aa89215c4f00d3833e158b8')) -paddle.audio.features.layers.LogMelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', '6c14f6f78dc697a6981cf90412e2f1ea')) -paddle.audio.features.layers.LogMelSpectrogram.load_dict (ArgSpec(args=[], varargs='args', varkw='kwargs', defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '01221a60445ee437f439a8cbe293f759')) -paddle.audio.features.layers.LogMelSpectrogram.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers', 'structured_name_prefix', 'use_hook'], varargs=None, varkw=None, defaults=(None, True, '', True), kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '0c01cb0c12220c9426ae49549b145b0b')) -paddle.audio.features.layers.MFCC (ArgSpec(), ('document', 'bcbe6499830d9228a4f746ddd63b6c0f')) -paddle.audio.features.layers.MFCC.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', 'd86bcaa345f26851089bfdb3efecd9e7')) -paddle.audio.features.layers.MelSpectrogram (ArgSpec(), ('document', 'adf4012310984568ae9da6170aa89f91')) -paddle.audio.features.layers.MelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', '458e9d454c8773091567c6b400f48cf5')) -paddle.audio.features.layers.Spectrogram (ArgSpec(), ('document', '83811af6da032099bf147e3e01a458e1')) -paddle.audio.features.layers.Spectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', 'ab11e318fca1410f743b5432394dea35')) -paddle.audio.functional (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.functional.functional.compute_fbank_matrix (ArgSpec(args=['sr', 'n_fft', 'n_mels', 'f_min', 'f_max', 'htk', 'norm', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, None, False, 'slaney', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'sr': , 'n_fft': , 'n_mels': , 'f_min': , 'f_max': typing.Union[float, NoneType], 'htk': , 'norm': typing.Union[str, float], 'dtype': }), ('document', '3c5411caa6baedb68860b09c81e0147c')) -paddle.audio.functional.functional.create_dct (ArgSpec(args=['n_mfcc', 'n_mels', 'norm', 'dtype'], varargs=None, varkw=None, defaults=('ortho', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'n_mfcc': , 'n_mels': , 'norm': typing.Union[str, NoneType], 'dtype': }), ('document', 'c9c57550671f9725b053769411d2f65a')) -paddle.audio.functional.functional.fft_frequencies (ArgSpec(args=['sr', 'n_fft', 'dtype'], varargs=None, varkw=None, defaults=('float32',), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'sr': , 'n_fft': , 'dtype': }), ('document', '057b990e79c9c780622407267c0a43c6')) -paddle.audio.functional.functional.hz_to_mel (ArgSpec(args=['freq', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[paddle.Tensor, float], 'freq': typing.Union[paddle.Tensor, float], 'htk': }), ('document', '7ca01521dd0bf26cd3f72c67f7168dc4')) -paddle.audio.functional.functional.mel_frequencies (ArgSpec(args=['n_mels', 'f_min', 'f_max', 'htk', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, 11025.0, False, 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'n_mels': , 'f_min': , 'f_max': , 'htk': , 'dtype': }), ('document', '2af3cf997ed1274214ec240b2b59a98d')) -paddle.audio.functional.functional.mel_to_hz (ArgSpec(args=['mel', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[float, paddle.Tensor], 'mel': typing.Union[float, paddle.Tensor], 'htk': }), ('document', 'e93b432d382f98c60d7c7599489e7072')) -paddle.audio.functional.functional.power_to_db (ArgSpec(args=['spect', 'ref_value', 'amin', 'top_db'], varargs=None, varkw=None, defaults=(1.0, 1e-10, 80.0), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'spect': , 'ref_value': , 'amin': , 'top_db': typing.Union[float, NoneType]}), ('document', '28bbb1973e8399e856bfaea0415cecb9')) -paddle.audio.functional.window.get_window (ArgSpec(args=['window', 'win_length', 'fftbins', 'dtype'], varargs=None, varkw=None, defaults=(True, 'float64'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'window': typing.Union[str, typing.Tuple[str, float]], 'win_length': , 'fftbins': , 'dtype': }), ('document', '2418d63da10c0cd5da9ecf0a88ddf783')) -paddle.audio.backends (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.backends.init_backend.get_current_audio_backend (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': }), ('document', '3ff9fd62e8be1f3dc7e34afaf50e1645')) -paddle.audio.backends.init_backend.list_available_backends (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.List[str]}), ('document', '8eba49f1b69f7ec7fa139a0714a2724e')) -paddle.audio.backends.init_backend.set_backend (ArgSpec(args=['backend_name'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'backend_name': }), ('document', '9680247dd97274d345dee415e2787527')) -paddle.audio.backends.wave_backend.info (ArgSpec(args=['filepath', 'format'], varargs=None, varkw=None, defaults=(None,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'filepath': , 'format': typing.Union[str, NoneType]}), ('document', 'e0ffd3accd942a9b0a4c08463a9f60f6')) -paddle.audio.backends.wave_backend.load (ArgSpec(args=['filepath', 'frame_offset', 'num_frames', 'normalize', 'channels_first', 'format'], varargs=None, varkw=None, defaults=(0, -1, True, True, None), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Tuple[paddle.Tensor, int], 'filepath': typing.Union[str, pathlib.Path], 'frame_offset': , 'num_frames': , 'normalize': , 'channels_first': , 'format': typing.Union[str, NoneType]}), ('document', '4de50575ca516b4b7c7c82c7fdec808f')) -paddle.audio.backends.wave_backend.save (ArgSpec(args=['filepath', 'src', 'sample_rate', 'channels_first', 'compression', 'format', 'encoding', 'bits_per_sample'], varargs=None, varkw=None, defaults=(True, None, None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={'filepath': , 'src': , 'sample_rate': , 'channels_first': , 'compression': typing.Union[float, NoneType], 'format': typing.Union[str, NoneType], 'encoding': typing.Union[str, NoneType], 'bits_per_sample': typing.Union[int, NoneType]}), ('document', '4c85cfcd29a0dcdfc32e74db8c0c3961')) -paddle.audio.datasets (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.datasets.TESS (ArgSpec(), ('document', '3605f3aa2191ede7ddbe594cd27bb067')) -paddle.audio.datasets.TESS.meta_info (ArgSpec(), ('document', '60d548a6f71629c3b69bcda3a30d4819')) diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index 5e2be03108294..2d7326f825acc 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -39,7 +39,7 @@ void ComputeInterceptor::PrepareDeps() { for (int64_t i = 0; i < node_->max_run_times(); ++i) { ready_size_map.emplace(i, 0); } - in_readys_.emplace(up.first, std::make_pair(up.second, ready_size_map)); + in_readies_.emplace(up.first, std::make_pair(up.second, ready_size_map)); } for (auto down : downstream) { out_buffs_.emplace(down.first, std::make_pair(down.second, 0)); @@ -106,11 +106,11 @@ InterceptorMessage ComputeInterceptor::PrepareVarsMsg() { } void ComputeInterceptor::IncreaseReady(int64_t up_id, int64_t scope_id) { - auto it = in_readys_.find(up_id); + auto it = in_readies_.find(up_id); PADDLE_ENFORCE_NE(it, - in_readys_.end(), + in_readies_.end(), platform::errors::NotFound( - "Cannot find upstream=%lld in in_readys.", up_id)); + "Cannot find upstream=%lld in in_readies.", up_id)); auto max_ready_size = it->second.first; const auto& ready_scope_map = it->second.second; @@ -171,7 +171,7 @@ bool ComputeInterceptor::IsInputReady() { for (int64_t i = start_micro_step; i < start_micro_step + num_micro_step; ++i) { bool flag = true; - for (auto& ins : in_readys_) { + for (auto& ins : in_readies_) { auto ready_size_map = ins.second.second; flag = flag && (ready_size_map.at(i) != 0); } @@ -268,7 +268,7 @@ void ComputeInterceptor::SendDataReadyToDownStream() { } void ComputeInterceptor::ReplyCompletedToUpStream() { - for (auto& ins : in_readys_) { + for (auto& ins : in_readies_) { auto up_id = ins.first; auto ready_size = ins.second.second.at(cur_scope_id_); ready_size -= 1; diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h index 26205d5ac8264..bb26c62061734 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h @@ -41,7 +41,7 @@ class ComputeInterceptor : public Interceptor { // upstream_id-->(max_ready_size, scope-->ready_size) std::map>> - in_readys_{}; + in_readies_{}; // downstream_id-->(max_buffer_size, used_size) std::map> out_buffs_{}; diff --git a/paddle/fluid/distributed/index_dataset/CMakeLists.txt b/paddle/fluid/distributed/index_dataset/CMakeLists.txt index 0bd11cc214de4..7d6f963e48634 100644 --- a/paddle/fluid/distributed/index_dataset/CMakeLists.txt +++ b/paddle/fluid/distributed/index_dataset/CMakeLists.txt @@ -3,11 +3,11 @@ cc_library( index_wrapper SRCS index_wrapper.cc DEPS index_dataset_proto framework_io) -if(WITH_MKLDNN) +if(WITH_ONEDNN) cc_library( index_sampler SRCS index_sampler.cc - DEPS xxhash index_wrapper eigen3 mkldnn) + DEPS xxhash index_wrapper eigen3 onednn) else() cc_library( index_sampler diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index eac2585416d8b..42d9dbce2f4d8 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -11,6 +11,10 @@ else() endif() +if(WITH_PSCORE AND NOT WITH_HETERPS) + set(BRPC_DEPS ${BRPC_DEPS} ps_service) +endif() + brpc_library( sendrecv_rpc SRCS diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index a6bb716e6b7ad..64950443c0efc 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(generator) -set(EAGER_GENERETOR_DEPS +set(EAGER_GENERATOR_DEPS ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} pybind @@ -13,12 +13,12 @@ set(EAGER_GENERETOR_DEPS imperative_flag) if(WITH_CUSTOM_DEVICE) - set(EAGER_GENERETOR_DEPS ${EAGER_GENERETOR_DEPS} + set(EAGER_GENERATOR_DEPS ${EAGER_GENERATOR_DEPS} custom_device_common_op_registry) endif() add_executable(eager_generator eager_generator.cc) -target_link_libraries(eager_generator ${EAGER_GENERETOR_DEPS}) +target_link_libraries(eager_generator ${EAGER_GENERATOR_DEPS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(eager_generator ${os_dependency_modules}) @@ -93,13 +93,13 @@ if(WIN32) list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/openblas.dll) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) message("Copied mkldnn.dll for Eager AutoCodeGen") add_custom_command( OUTPUT ${eager_generator_path}/mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${eager_generator_path} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/mkldnn.dll) endif() diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 32b36ecf2eea6..c272e09a9579f 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -48,7 +48,7 @@ # so we should check parameter(output) with rule of inplace. # But because there is no check in old dygraph mode, in order to # keeping the code compatible, here we also skip inplace check in new dygraph temporarily, -# and this will be fixed in the futrue. +# and this will be fixed in the future. inplace_check_blacklist = {"assign_out_"} # Black Ops list that's NO NEED to apply code generation @@ -75,9 +75,12 @@ "tanh_triple_grad", "minimum_double_grad", "maximum_double_grad", + "abs_triple_grad", + "exp_double_grad", + "log_double_grad", ] -# white ops list whose kernel can automaically do type promotion. +# white ops list whose kernel can automatically do type promotion. # future will get this list from same place with static graph. type_promote_white_list = { "add": ["x", "y"], @@ -85,8 +88,8 @@ "where": ["x", "y"], } -# dict of special api that forward api's output will affect bacward api's output -# bacward api's output usually affected by backward api's input +# dict of special api that forward api's output will affect backward api's output +# backward api's output usually affected by backward api's input special_prune_dict = { "matmul_grad": {"x": "grad_y", "y": "grad_x"}, } @@ -289,7 +292,7 @@ class {} : public egr::GradNodeBase {{ // Forward API Call {} - // Log memory infomation + // Log memory information {} // Check NaN and Inf if needed {} @@ -343,7 +346,7 @@ class {} : public egr::GradNodeBase {{ {} // Forward API Call {} - // Log memory infomation + // Log memory information {} // Check NaN and Inf if needed {} @@ -535,8 +538,8 @@ class {} : public egr::GradNodeBase {{ """ TYPE_PROMOTION_LOGIC_TEMPLATE = """ if (phi::NeedTypePromotion({x}.dtype(), {y}.dtype())) {{ - VLOG(5) << "got different data type, run type protmotion automatically."; - LOG_FIRST_N(WARNING, 1) << "got different data type, run type protmotion automatically, this may cause data type been changed."; + VLOG(5) << "got different data type, run type promotion automatically."; + LOG_FIRST_N(WARNING, 1) << "got different data type, run type promotion automatically, this may cause data type been changed."; {op_name} auto promotion_type = phi::GetPromoteDtype(op_name, {x}.dtype(), {y}.dtype()); @@ -1128,7 +1131,7 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): need_pre_contiguous_set.add(name) set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper_{name}({name}_tmp);" set_input_tensor_wrappers_list.append(set_tensor_wrappers) - else: # Forwad's output as backward's input + else: # Forward's output as backward's input if num_fwd_outputs > 1: # Aligned with forward output position assert name in forward_outputs_position_map, AssertMessage( @@ -3055,7 +3058,7 @@ def GenerateForwardHFile(filepath, forward_function_declaration_str): for i in range(len(api_yaml_paths)): api_yaml_path = api_yaml_paths[i] - # string api is forwrad only + # string api is forward only if not api_yaml_path.endswith('strings_ops.yaml'): backward_yaml_path = backward_yaml_paths[i] else: diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index 71a72db60d8cb..18e72c4f0782a 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -142,33 +142,6 @@ class TensorWrapper { } } -#ifndef PADDLE_NO_PYTHON - TensorWrapper(const TensorWrapper& other) { - no_need_buffer_ = other.no_need_buffer_; - intermidiate_tensor_ = other.intermidiate_tensor_; - weak_grad_node_ = other.weak_grad_node_; - inplace_version_snapshot_ = other.inplace_version_snapshot_; - packed_value_ = other.packed_value_; - unpack_hook_ = other.unpack_hook_; - if (packed_value_) { - packed_value_->inc_ref(); - } - } - - TensorWrapper& operator=(const TensorWrapper& other) { - no_need_buffer_ = other.no_need_buffer_; - intermidiate_tensor_ = other.intermidiate_tensor_; - weak_grad_node_ = other.weak_grad_node_; - inplace_version_snapshot_ = other.inplace_version_snapshot_; - packed_value_ = other.packed_value_; - unpack_hook_ = other.unpack_hook_; - if (packed_value_) { - packed_value_->inc_ref(); - } - return *this; - } -#endif - paddle::Tensor recover() { VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name() << " for wrapper"; diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index e5a2b04d5642b..fdcad4f439c3b 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -33,7 +33,7 @@ #include "paddle/pir/include/core/value.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(enable_pir_with_pt_in_dy2st); diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 4dc0db770727a..1659430d6216f 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -717,8 +717,8 @@ std::string EagerUtils::GradNodeStr(const egr::GradNodeBase& node) { in_slot_str += paddle::string::Sprintf(SLOT_INFO_TEMPLATE, i, sg_str, edges_str); } - std::string in_meta_str = - paddle::string::Sprintf(GRAD_SLOT_META_TEMPLATE, in_slot_str); + std::string in_meta_str = paddle::string::Sprintf( + GRAD_SLOT_META_TEMPLATE, in_metas.size(), in_slot_str); return paddle::string::Sprintf( GRAD_NODE_TEMPLATE, out_meta_str, in_meta_str); } else if (VLOG_IS_ON(5)) { diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4dfd8312f6153..62459827d3c39 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -153,10 +153,10 @@ if(WITH_XPU) target_link_libraries(var_type_traits dynload_xpti) endif() -# every source file that includes "dnnl.h" must depends on mkldnn -# or, the first one should depends on mkldnn -if(WITH_MKLDNN) - add_dependencies(var_type_traits mkldnn) +# every source file that includes "dnnl.h" must depends on onednn +# or, the first one should depends on onednn +if(WITH_ONEDNN) + add_dependencies(var_type_traits onednn) endif() set(BRPC_DEPS "") @@ -273,10 +273,10 @@ cc_library( SRCS shape_inference.cc DEPS phi common attribute selected_rows_utils) -# every source file that includes "dnnl.h" must depends on mkldnn -# or, the first one should depends on mkldnn -if(WITH_MKLDNN) - add_dependencies(shape_inference mkldnn) +# every source file that includes "dnnl.h" must depends on onednn +# or, the first one should depends on onednn +if(WITH_ONEDNN) + add_dependencies(shape_inference onednn) endif() cc_library( @@ -954,8 +954,8 @@ cc_library( DEPS common) target_link_libraries(type_info pir op_dialect) add_dependencies(type_info framework_proto auto_parallel_proto xxhash) -if(WITH_MKLDNN) - add_dependencies(type_info mkldnn) +if(WITH_ONEDNN) + add_dependencies(type_info onednn) endif() set(FLUID_FRAMEWORK_MODULES diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 039ed3ffc2441..1a70dca1ff4f1 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -26,7 +26,7 @@ class Variable; } // namespace paddle #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index d771a12411adb..20c1444f238eb 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -54,8 +54,8 @@ set(op_handle_deps selected_rows_utils reference_count_pass_helper) -if(WITH_MKLDNN) - set(op_handle_deps ${op_handle_deps} mkldnn) +if(WITH_ONEDNN) + set(op_handle_deps ${op_handle_deps} onednn) endif() if(WITH_DGC) @@ -161,6 +161,6 @@ cc_library( SRCS build_strategy.cc DEPS pass_builder ${IR_PASS_DEPS}) -if(WITH_MKLDNN) - target_link_libraries(build_strategy mkldnn_placement_pass) +if(WITH_ONEDNN) + target_link_libraries(build_strategy onednn_placement_pass) endif() diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index f49936bf44739..e44edfca1bdf0 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -78,7 +78,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { AppendMultiDevPass(); AppendMultiGraphOptPasses(); - AppendPassToSetMkldnnAttr("mkldnn_placement_pass"); + AppendPassToSetMkldnnAttr("onednn_placement_pass"); // runtime_context_cache pass should be the last pass to enable the attr of // all original and fused operators. But no operators can be enabled this // attr if putting it after MultiDevPass. @@ -179,7 +179,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { "delete_dropout_op_x_pass"); AppendPassWithCheck( strategy_.enable_inference_pass_ && strategy_.use_mkldnn_, - "mkldnn_placement_pass"); + "onednn_placement_pass"); // 2. trainning pass #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -480,7 +480,7 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, "GPU, skipped."; continue; } - } else if (pass->Type() == "mkldnn_placement_pass") { + } else if (pass->Type() == "onednn_placement_pass") { pass->Set("mkldnn_enabled_op_types", new std::unordered_set(mkldnn_enabled_op_types_)); } else if (pass->Type() == "backward_optimizer_op_deps_pass") { @@ -548,7 +548,7 @@ USE_PASS(build_cinn_pass); USE_PASS(fused_feedforward_pass); #endif #ifdef PADDLE_WITH_DNNL -USE_PASS(mkldnn_placement_pass); +USE_PASS(onednn_placement_pass); #endif #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && \ !defined(_WIN32) && !defined(__APPLE__) diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index e954fd6a7a348..c0c7e6765b4dc 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -141,8 +141,8 @@ struct BuildStrategy { // Fuse ResUnit bool fuse_resunit_{false}; // mkldnn_enabled_op_types specify the operator type list to - // use MKLDNN acceleration. It is null in default, means - // that all the operators supported by MKLDNN will be + // use OneDNN acceleration. It is null in default, means + // that all the operators supported by OneDNN will be // accelerated. And it should not be set when // FLAGS_use_mkldnn=false std::unordered_set mkldnn_enabled_op_types_; diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index fbc2565e755fa..9d6ac59018856 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -26,7 +26,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/common/flags.h" #include "paddle/fluid/framework/executor_gc_helper.h" @@ -609,7 +609,7 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { } #else LOG(WARNING) - << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"; + << "'MKLDNN' is not supported, Please re-compile with WITH_ONEDNN option"; #endif } } // namespace framework diff --git a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu index 7ad502c89af92..58ab45db3e940 100644 --- a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu +++ b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu @@ -355,22 +355,26 @@ void AccessorWrapper::CopyForPushImpl( int64_t* gpu_len = reinterpret_cast(buf_length->ptr()); int* d_slot_vector = reinterpret_cast(buf_slot_vector->ptr()); int* d_mf_dim_vector = reinterpret_cast(buf_mf_dim_vector->ptr()); - cudaMemcpy(gpu_values, - grad_values.data(), - grad_values.size() * sizeof(float*), - cudaMemcpyHostToDevice); - cudaMemcpy(gpu_len, - slot_lengths_lod.data(), - slot_lengths.size() * sizeof(int64_t), - cudaMemcpyHostToDevice); - cudaMemcpy(d_slot_vector, - slot_vector.data(), - slot_lengths_lod.size() * sizeof(int), - cudaMemcpyHostToDevice); - cudaMemcpy(d_mf_dim_vector, - slot_mf_dim_vector.data(), - slot_lengths_lod.size() * sizeof(int), - cudaMemcpyHostToDevice); + cudaMemcpyAsync(gpu_values, + grad_values.data(), + grad_values.size() * sizeof(float*), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(gpu_len, + slot_lengths_lod.data(), + slot_lengths.size() * sizeof(int64_t), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(d_slot_vector, + slot_vector.data(), + slot_lengths_lod.size() * sizeof(int), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(d_mf_dim_vector, + slot_mf_dim_vector.data(), + slot_lengths_lod.size() * sizeof(int), + cudaMemcpyHostToDevice, + stream); PushCopyWithPool<<<(total_length + 1024 - 1) / 1024, 1024, 0, stream>>>( total_grad_values_gpu, gpu_values, diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h index 069dfeeec157b..49a1592348895 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h @@ -1631,6 +1631,7 @@ void HeterComm::pull_merge_sparse( val_type_size); } + AnyDeviceGuard guard2(dev_id); auto d_merged_vals = MemoryAlloc(place, uniq_len * val_type_size); auto d_merged_vals_ptr = reinterpret_cast(d_merged_vals->ptr()); heter_comm_kernel_->dy_mf_fill_dvals(d_shard_vals_ptr, diff --git a/paddle/fluid/framework/io/CMakeLists.txt b/paddle/fluid/framework/io/CMakeLists.txt index 82f879bce353b..8d55a10ee3310 100644 --- a/paddle/fluid/framework/io/CMakeLists.txt +++ b/paddle/fluid/framework/io/CMakeLists.txt @@ -14,6 +14,6 @@ cc_library( SRCS ${framework_io_srcs} DEPS ${framework_io_deps}) -if(WITH_MKLDNN) - add_dependencies(framework_io mkldnn) +if(WITH_ONEDNN) + add_dependencies(framework_io onednn) endif() diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index cb8093298d9bb..95c5d1ec796cc 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -174,42 +174,42 @@ if(WITH_GPU OR WITH_ROCM) pass_library(embedding_eltwise_layernorm_fuse_pass inference) endif() -if(WITH_MKLDNN) - pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) - pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) - pass_library(conv_affine_channel_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn) - pass_library(params_quantization_mkldnn_pass inference DIR mkldnn) - pass_library(scale_matmul_fuse_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_pass inference DIR mkldnn) - pass_library(fc_mkldnn_pass inference DIR mkldnn) - pass_library(interpolate_mkldnn_pass inference DIR mkldnn) - pass_library(softplus_activation_onednn_fuse_pass inference DIR mkldnn) - pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) - pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elementwise_act_onednn_fuse_pass inference DIR mkldnn) - pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(operator_scale_onednn_fuse_pass inference DIR mkldnn) - pass_library(quant_transpose2_dequant_onednn_fuse_pass inference DIR mkldnn) - pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR mkldnn) - pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR mkldnn) - pass_library(operator_reshape2_onednn_fuse_pass inference DIR mkldnn) - pass_library(cpu_quantize_placement_pass base DIR mkldnn) - pass_library(cpu_quantize_pass inference DIR mkldnn) - pass_library(cpu_quantize_squash_pass inference DIR mkldnn) - pass_library(reshape_transpose_matmul_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(matmul_transpose_reshape_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(batch_norm_act_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) - pass_library(quant_dequant_mkldnn_pass inference DIR mkldnn) - pass_library(compute_propagate_scales_mkldnn_pass inference DIR mkldnn) - pass_library(self_attention_fuse_pass inference DIR mkldnn) +if(WITH_ONEDNN) + pass_library(onednn_placement_pass base DEPS placement_pass_base DIR onednn) + pass_library(depthwise_conv_onednn_pass base DIR onednn) + pass_library(conv_affine_channel_onednn_fuse_pass inference DIR onednn) + pass_library(conv_bias_onednn_fuse_pass inference DIR onednn) + pass_library(conv_activation_onednn_fuse_pass inference DIR onednn) + pass_library(conv_elementwise_add_onednn_fuse_pass inference DIR onednn) + pass_library(int8_scale_calculation_onednn_pass inference DIR onednn) + pass_library(params_quantization_onednn_pass inference DIR onednn) + pass_library(scale_matmul_fuse_pass inference DIR onednn) + pass_library(cpu_bfloat16_placement_pass inference DIR onednn) + pass_library(cpu_bfloat16_pass inference DIR onednn) + pass_library(fc_onednn_pass inference DIR onednn) + pass_library(interpolate_onednn_pass inference DIR onednn) + pass_library(softplus_activation_onednn_fuse_pass inference DIR onednn) + pass_library(shuffle_channel_onednn_detect_pass inference DIR onednn) + pass_library(fc_act_onednn_fuse_pass inference DIR onednn) + pass_library(elementwise_act_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_elementwise_add_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_activation_onednn_fuse_pass inference DIR onednn) + pass_library(operator_scale_onednn_fuse_pass inference DIR onednn) + pass_library(quant_transpose2_dequant_onednn_fuse_pass inference DIR onednn) + pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR onednn) + pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR onednn) + pass_library(operator_reshape2_onednn_fuse_pass inference DIR onednn) + pass_library(cpu_quantize_placement_pass base DIR onednn) + pass_library(cpu_quantize_pass inference DIR onednn) + pass_library(cpu_quantize_squash_pass inference DIR onednn) + pass_library(reshape_transpose_matmul_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_transpose_reshape_onednn_fuse_pass inference DIR onednn) + pass_library(batch_norm_act_fuse_pass inference DIR onednn) + pass_library(multi_gru_fuse_pass inference DIR onednn) + pass_library(multi_gru_seq_fuse_pass inference DIR onednn) + pass_library(quant_dequant_onednn_pass inference DIR onednn) + pass_library(compute_propagate_scales_onednn_pass inference DIR onednn) + pass_library(self_attention_fuse_pass inference DIR onednn) if(WITH_AVX AND AVX512F_FOUND AND AVX512F_FLAG) @@ -274,6 +274,8 @@ if(WITH_XPU) ${XPU_PASS_DEPS}) pass_library(decoder_attention_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(cross_attention_xpu_fuse_pass inference DIR xpu DEPS + ${XPU_PASS_DEPS}) pass_library(multi_encoder_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(multi_encoder_xpu_adaptive_seqlen_fuse_pass inference DIR xpu @@ -301,6 +303,8 @@ if(WITH_XPU) ${XPU_PASS_DEPS}) pass_library(add_layernorm_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(group_norm_silu_xpu_fuse_pass inference DIR xpu DEPS + ${XPU_PASS_DEPS}) pass_library(xpu_delete_cast_op_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(fold_interp_outsize_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) @@ -324,6 +328,8 @@ if(WITH_XPU) pass_library(quant_dequant_xpu_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(roformer_relative_pos_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(spatial_transformer_resblock_xpu_fuse_pass inference DIR xpu + DEPS ${XPU_PASS_DEPS}) endif() cc_library( @@ -536,19 +542,19 @@ if(NOT WIN32) SRCS dense_multihead_matmul_to_sparse_pass_tester.cc DEPS multihead_matmul_fuse_pass dense_multihead_matmul_to_sparse_pass) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) cc_test( - test_depthwise_conv_mkldnn_pass - SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc - DEPS depthwise_conv_mkldnn_pass) + test_depthwise_conv_onednn_pass + SRCS onednn/depthwise_conv_onednn_pass_tester.cc + DEPS depthwise_conv_onednn_pass) cc_test( - test_int8_scale_calculation_mkldnn_pass - SRCS mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc - DEPS int8_scale_calculation_mkldnn_pass pass_test_util) + test_int8_scale_calculation_onednn_pass + SRCS onednn/int8_scale_calculation_onednn_pass_tester.cc + DEPS int8_scale_calculation_onednn_pass pass_test_util) cc_test( - test_params_quantization_mkldnn_pass - SRCS mkldnn/params_quantization_mkldnn_pass_tester.cc - DEPS params_quantization_mkldnn_pass) + test_params_quantization_onednn_pass + SRCS onednn/params_quantization_onednn_pass_tester.cc + DEPS params_quantization_onednn_pass) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass @@ -566,43 +572,43 @@ if(WITH_MKLDNN) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) endif() cc_test( - test_mkldnn_placement_pass - SRCS mkldnn/mkldnn_placement_pass_tester.cc - DEPS mkldnn_placement_pass) + test_onednn_placement_pass + SRCS onednn/onednn_placement_pass_tester.cc + DEPS onednn_placement_pass) cc_test( - test_compute_propagate_scales_mkldnn_pass - SRCS mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc - DEPS compute_propagate_scales_mkldnn_pass naive_executor) + test_compute_propagate_scales_onednn_pass + SRCS onednn/compute_propagate_scales_onednn_pass_tester.cc + DEPS compute_propagate_scales_onednn_pass naive_executor) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will # be build only in CI, so suppose the generator in Windows is Ninja. - copy_onnx(test_compute_propagate_scales_mkldnn_pass) + copy_onnx(test_compute_propagate_scales_onednn_pass) endif() cc_test( test_cpu_quantize_placement_pass - SRCS mkldnn/cpu_quantize_placement_pass_tester.cc + SRCS onednn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) cc_test( test_cpu_quantize_pass - SRCS mkldnn/cpu_quantize_pass_tester.cc + SRCS onednn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) cc_test( test_cpu_quantize_squash_pass - SRCS mkldnn/cpu_quantize_squash_pass_tester.cc + SRCS onednn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test( - test_shuffle_channel_mkldnn_detect_pass - SRCS mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc - DEPS shuffle_channel_mkldnn_detect_pass) + test_shuffle_channel_onednn_detect_pass + SRCS onednn/shuffle_channel_onednn_detect_pass_tester.cc + DEPS shuffle_channel_onednn_detect_pass) cc_test( test_cpu_bfloat16_placement_pass - SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc + SRCS onednn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass) cc_test( test_cpu_bfloat16_pass - SRCS mkldnn/cpu_bfloat16_pass_tester.cc + SRCS onednn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) endif() diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index f1657d4db5fdc..b91132784b95f 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -669,7 +669,8 @@ bool AutoMixedPrecisionPass::InputVarsNotConvert( if (std::find(vecs.begin(), vecs.end(), var_name) != vecs.end()) { return true; } - } else if (GetOpOriginalType(op_desc->Type()) == "instance_norm") { + } else if (GetOpOriginalType(op_desc->Type()) == "instance_norm" || + GetOpOriginalType(op_desc->Type()) == "layer_norm") { auto vecs = op_desc->Input("Bias"); if (std::find(vecs.begin(), vecs.end(), var_name) != vecs.end()) { return true; diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 4faebacb5f55c..947dc73333e0c 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc index 52ba852a730a5..cd823afa96dd4 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -16,6 +16,9 @@ #include #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -194,7 +197,11 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const { auto new_op_proto = PrepareOpDesc( base_op_desc, bias_name, bias1_name, act_op_type, act_op_out); framework::OpDesc new_op_desc(new_op_proto, nullptr); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); } diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc index a560c0ab52e5a..0f5f2f7cc78b6 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -15,6 +15,9 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -215,7 +218,11 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const { auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out, alpha); framework::OpDesc new_op_desc(new_op_proto, nullptr); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); new_op_desc.Flush(); } diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc index a3defa9f3ed06..4a0dd02db0f24 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -15,6 +15,9 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -121,14 +124,18 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const { static_cast(Get("model_precision")) == phi::DataType::FLOAT16 || Get("enable_gpu_mixed"); + bool cutlass_enable = Get("use_cutlass"); auto* scope = param_scope(); bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport( conv_op->Op(), scope, act_type, Get("gpu_device_id")); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); } - auto* elementwise_add_op_desc = elementwise_add_op->Op(); auto out_threshold_attr = elementwise_add_op_desc->GetNullableAttr("out_threshold"); diff --git a/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h index 44631c54ef556..138d66731b54c 100644 --- a/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_desc.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc index 230971a2dd286..788644dc85876 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc index b94c99c92cdbb..1c733636ca7b0 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h" #include @@ -487,8 +487,8 @@ void ComputePropagateScalesMkldnnPass::PropagateScales( } void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { - VLOG(3) << "Convert paddle model to mkldnn quantized model."; - const std::string pattern_name = "compute_propagate_scales_mkldnn_pass"; + VLOG(3) << "Convert paddle model to onednn quantized model."; + const std::string pattern_name = "compute_propagate_scales_onednn_pass"; FusePassBase::Init(pattern_name, graph); const std::unordered_set scale_immutable_ops = { @@ -520,10 +520,10 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(compute_propagate_scales_mkldnn_pass, +REGISTER_PASS(compute_propagate_scales_onednn_pass, paddle::framework::ir::ComputePropagateScalesMkldnnPass); -REGISTER_PASS_CAPABILITY(compute_propagate_scales_mkldnn_pass) +REGISTER_PASS_CAPABILITY(compute_propagate_scales_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h index 2c2474438bedf..b63c74a884118 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc index c09a2d1ffbb8d..9664647fd4214 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc @@ -15,7 +15,7 @@ #include #include -#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc index 077a29d113bb7..61c0457f7c740 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -42,18 +42,18 @@ void ConvActivationMkldnnFusePass::FuseConvAct(Graph* graph, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(conv_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(conv_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation conv_act_pattern(gpd.mutable_pattern(), - "conv_activation_mkldnn_fuse"); + "conv_activation_onednn_fuse"); conv_act_pattern(conv_type, act_type); int found_conv_activation_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { if (!IsCompat(subgraph, g)) { - LOG(WARNING) << "conv_activation_mkldnn_fuse_pass op compat failed."; + LOG(WARNING) << "conv_activation_onednn_fuse_pass op compat failed."; return; } @@ -92,12 +92,12 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct( Graph* graph, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("conv2d_concat_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init("conv2d_concat_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; auto pattern = gpd.mutable_pattern(); patterns::OperatorActivation conv_concat_act( - pattern, "conv2d_concat_" + act_type + "_mkldnn_fuse_pass"); + pattern, "conv2d_concat_" + act_type + "_onednn_fuse_pass"); conv_concat_act("concat", act_type); int found_conv_concat_activation_count = 0; @@ -105,7 +105,7 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct( Graph* g) { if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "conv_concat_activation_mkldnn_fuse_pass op compat failed."; + << "conv_concat_activation_onednn_fuse_pass op compat failed."; return; } @@ -377,10 +377,10 @@ ConvActivationMkldnnFusePass::ConvActivationMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(conv_activation_mkldnn_fuse_pass, +REGISTER_PASS(conv_activation_onednn_fuse_pass, paddle::framework::ir::ConvActivationMkldnnFusePass); -REGISTER_PASS_CAPABILITY(conv_activation_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_activation_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h index b50fa8997fdf8..9821421254c66 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h @@ -33,9 +33,9 @@ class ConvActivationMkldnnFusePass : public FusePassBase { void FuseConvAct(Graph *graph, const std::string &conv_type, - std::string &act_type) const; + std::string &act_type) const; // NOLINT - void FuseConvConcatAct(Graph *graph, std::string &act_type) const; + void FuseConvConcatAct(Graph *graph, std::string &act_type) const; // NOLINT }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc index eedb5b3b60bd5..5ee6e361bcc92 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h" #include @@ -313,10 +313,10 @@ void ConvAffineChannelFusePass::FuseConvAffineChannel( } // namespace framework } // namespace paddle -REGISTER_PASS(conv_affine_channel_mkldnn_fuse_pass, +REGISTER_PASS(conv_affine_channel_onednn_fuse_pass, paddle::framework::ir::ConvAffineChannelFusePass); -REGISTER_PASS_CAPABILITY(conv_affine_channel_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_affine_channel_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h index cc0a761c31ed2..49545ad565e52 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h @@ -38,7 +38,7 @@ class ConvAffineChannelFusePass : public FusePassBase { void ApplyImpl(ir::Graph*) const override; void FuseConvAffineChannel(ir::Graph* graph, const std::string& conv_type) const; - const std::string name_scope_{"conv_affine_channel_mkldnn_fuse"}; + const std::string name_scope_{"conv_affine_channel_onednn_fuse"}; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc index 0aa71c3df5fb5..1cf663d13deef 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h" #include #include @@ -448,21 +448,21 @@ void ConvBiasFusePass::FuseConvBias(ir::Graph* graph, } // namespace ir } // namespace framework } // namespace paddle -REGISTER_PASS(conv_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv_bias_onednn_fuse_pass, paddle::framework::ir::ConvBiasFusePass); -REGISTER_PASS_CAPABILITY(conv_bias_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_bias_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) .LE("elementwise_add", 1)); -REGISTER_PASS(conv_transpose_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv_transpose_bias_onednn_fuse_pass, paddle::framework::ir::Conv2DTransposeBiasFusePass); -REGISTER_PASS_CAPABILITY(conv_transpose_bias_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_transpose_bias_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d_transpose", 2) .LE("elementwise_add", 1)); -REGISTER_PASS(conv3d_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv3d_bias_onednn_fuse_pass, paddle::framework::ir::Conv3DBiasFusePass); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h index 4fb8418686299..f53cdf19d29f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h @@ -40,7 +40,7 @@ class ConvBiasFusePass : public FusePassBase { const std::string& conv_type, const std::string& fused_conv) const; - const std::string name_scope_{"conv_bias_mkldnn_fuse"}; + const std::string name_scope_{"conv_bias_onednn_fuse"}; }; /* diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc index fecf4a4eaf5f8..7733730f7d605 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -156,12 +156,12 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConv( if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "conv_elementwise_add_mkldnn_fuse_pass in op compat failed."; + << "conv_elementwise_add_onednn_fuse_pass in op compat failed."; return; } if (residual_data->Var()->GetShape() != conv_output->Var()->GetShape()) { - LOG(WARNING) << "conv_elementwise_add_mkldnn_fuse_pass doesn't support " - + LOG(WARNING) << "conv_elementwise_add_onednn_fuse_pass doesn't support " - "broadcasting"; return; } @@ -235,7 +235,7 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "op compat for conv_elementwise_add_mkldnn_fuse_pass failed."; + << "op compat for conv_elementwise_add_onednn_fuse_pass failed."; return; } @@ -309,9 +309,9 @@ void ResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass, +REGISTER_PASS(conv_elementwise_add_onednn_fuse_pass, paddle::framework::ir::ResidualConnectionMKLDNNFusePass); -REGISTER_PASS_CAPABILITY(conv_elementwise_add_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_elementwise_add_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc index 528ba5747218a..1cebbfc1617a0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc @@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h" #include #include diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc index 951d064364ce3..c31e59b39216a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h" #include "paddle/fluid/imperative/type_defs.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc index 8741b00f689f5..a07887dafb276 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h" #include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc index c420c616a9ca6..e2de24cc398e0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc @@ -14,8 +14,8 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h" +#include "paddle/fluid/platform/onednn_helper.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc index 0e9c452455de3..a512f4b8021f4 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h" #include #include #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc index c7e15e24216aa..3c1f4d8d60925 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc @@ -16,7 +16,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc index 2071f284126b7..56ba19a5cc22b 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h" #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc index 5cbd64c49d200..bd5db7c0e3df2 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc @@ -14,8 +14,8 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h" +#include "paddle/fluid/platform/onednn_helper.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc index 578ab67f2a3b7..91f878a16abd0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc @@ -13,13 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h" #include #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc index 7d4429a2eb7f2..fc57bdb6b52ef 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc similarity index 90% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc index fca71d0bd6900..703a2c685e770 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -76,7 +76,7 @@ DepthwiseConvMKLDNNPass::DepthwiseConvMKLDNNPass() { // NOLINT void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL( graph, platform::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("depthwise_conv_mkldnn_pass", graph); + FusePassBase::Init("depthwise_conv_onednn_pass", graph); GraphPatternDetector gpd; auto* pattern = gpd.mutable_pattern(); @@ -84,7 +84,7 @@ void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { ->assert_is_op("depthwise_conv2d") ->assert_op_attr("use_mkldnn", true); - int found_depthwise_conv_mkldnn_count = 0; + int found_depthwise_conv_onednn_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { if (!IsCompat(subgraph, g)) { @@ -94,20 +94,20 @@ void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "handle DepthwiseConvMKLDNN fuse"; GET_NODE(depthwise_conv, (*pattern)); depthwise_conv->Op()->SetType("conv2d"); - found_depthwise_conv_mkldnn_count++; + found_depthwise_conv_onednn_count++; }; gpd(graph, handler); - AddStatis(found_depthwise_conv_mkldnn_count); + AddStatis(found_depthwise_conv_onednn_count); } } // namespace ir } // namespace framework } // namespace paddle -REGISTER_PASS(depthwise_conv_mkldnn_pass, +REGISTER_PASS(depthwise_conv_onednn_pass, paddle::framework::ir::DepthwiseConvMKLDNNPass); -REGISTER_PASS_CAPABILITY(depthwise_conv_mkldnn_pass) +REGISTER_PASS_CAPABILITY(depthwise_conv_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "depthwise_conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc similarity index 89% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc index f74e95fff10d8..5fdb7ad959921 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -105,7 +105,7 @@ ProgramDesc BuildProgramDesc() { TEST(DepthwiseConvMKLDNNPass, pass_op_version_check) { ASSERT_TRUE( paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance() - .IsPassCompatible("depthwise_conv_mkldnn_pass")); + .IsPassCompatible("depthwise_conv_onednn_pass")); } TEST(DepthwiseConvMKLDNNPass, basic) { @@ -113,12 +113,12 @@ TEST(DepthwiseConvMKLDNNPass, basic) { std::unique_ptr graph(new ir::Graph(prog)); - auto pass = PassRegistry::Instance().Get("depthwise_conv_mkldnn_pass"); + auto pass = PassRegistry::Instance().Get("depthwise_conv_onednn_pass"); struct counters { - int mkldnn_depthwise_conv_nodes; + int onednn_depthwise_conv_nodes; int other_depthwise_conv_nodes; - int mkldnn_conv_nodes; + int onednn_conv_nodes; int other_conv_nodes; }; @@ -134,12 +134,12 @@ TEST(DepthwiseConvMKLDNNPass, basic) { auto* op = node->Op(); if (op->Type() == "conv2d") { if (PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn"))) - after.mkldnn_conv_nodes++; + after.onednn_conv_nodes++; else after.other_conv_nodes++; } else if (op->Type() == "depthwise_conv2d") { if (PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn"))) - after.mkldnn_depthwise_conv_nodes++; + after.onednn_depthwise_conv_nodes++; else after.other_depthwise_conv_nodes++; } @@ -149,13 +149,13 @@ TEST(DepthwiseConvMKLDNNPass, basic) { EXPECT_EQ(after.other_depthwise_conv_nodes, before.other_depthwise_conv_nodes); EXPECT_EQ(after.other_conv_nodes, before.other_conv_nodes); - EXPECT_EQ(after.mkldnn_depthwise_conv_nodes, - before.mkldnn_depthwise_conv_nodes - 1); - EXPECT_EQ(after.mkldnn_conv_nodes, before.mkldnn_conv_nodes + 1); + EXPECT_EQ(after.onednn_depthwise_conv_nodes, + before.onednn_depthwise_conv_nodes - 1); + EXPECT_EQ(after.onednn_conv_nodes, before.onednn_conv_nodes + 1); } } // namespace ir } // namespace framework } // namespace paddle -USE_PASS(depthwise_conv_mkldnn_pass); +USE_PASS(depthwise_conv_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc index b6e84145aebff..3f0423870d366 100644 --- a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -43,7 +43,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( const std::string &act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(elt_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(elt_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation elementwise_act_pattern(gpd.mutable_pattern(), diff --git a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc similarity index 89% rename from paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc index 47c76289d187c..aa4ee8cb5e767 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -34,11 +34,11 @@ void FuseFCActOneDNNPass::FuseFCAct(Graph *graph, const std::string &act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("fc_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init("fc_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation fc_act_pattern( - gpd.mutable_pattern(), "fc_" + act_type + "_mkldnn_fuse_pass"); + gpd.mutable_pattern(), "fc_" + act_type + "_onednn_fuse_pass"); fc_act_pattern("fc", act_type); int found_fc_act_count = 0; @@ -70,9 +70,9 @@ void FuseFCActOneDNNPass::FuseFCAct(Graph *graph, } // namespace framework } // namespace paddle -REGISTER_PASS(fc_act_mkldnn_fuse_pass, +REGISTER_PASS(fc_act_onednn_fuse_pass, paddle::framework::ir::FuseFCActOneDNNPass); -REGISTER_PASS_CAPABILITY(fc_act_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(fc_act_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("fc", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc index f4396d6d8175a..082579428a01a 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/fc_onednn_pass.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -33,10 +33,10 @@ void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - Init("fc_mkldnn_pass", graph); + Init("fc_onednn_pass", graph); GraphPatternDetector gpd; - patterns::FCMKLDNN fc_pattern(gpd.mutable_pattern(), "fc_mkldnn_pass"); + patterns::FCMKLDNN fc_pattern(gpd.mutable_pattern(), "fc_onednn_pass"); // searching for fc+residual doesn't make sense at this stage fc_pattern(false /*with_residual*/); @@ -89,4 +89,4 @@ void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(fc_mkldnn_pass, paddle::framework::ir::FCMKLDNNPass); +REGISTER_PASS(fc_onednn_pass, paddle::framework::ir::FCMKLDNNPass); diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/fc_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc index a219e47072782..499a7734d71d6 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/core/enforce.h" namespace paddle { @@ -110,10 +110,10 @@ void Int8ScaleCalculationMkldnnPass::Int8ScaleImpl( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init("int8_scale_calculation_mkldnn_pass", graph); + FusePassBase::Init("int8_scale_calculation_onednn_pass", graph); GraphPatternDetector gpd; patterns::Conv conv_pattern(gpd.mutable_pattern(), - "int8_scale_calculation_mkldnn_pass"); + "int8_scale_calculation_onednn_pass"); conv_pattern(conv_type); int found_int8_scales_count = 0; @@ -214,9 +214,9 @@ void Int8ScaleCalculationMkldnnPass::Int8ScaleImpl( } // namespace framework } // namespace paddle -REGISTER_PASS(int8_scale_calculation_mkldnn_pass, +REGISTER_PASS(int8_scale_calculation_onednn_pass, paddle::framework::ir::Int8ScaleCalculationMkldnnPass); -REGISTER_PASS_CAPABILITY(int8_scale_calculation_mkldnn_pass) +REGISTER_PASS_CAPABILITY(int8_scale_calculation_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc index fde7fb07b9108..e015276ac1f67 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h" namespace paddle { namespace framework { @@ -95,7 +95,7 @@ void MainTest(bool convWithExistingBias, auto prog = BuildProgramDesc(convWithExistingBias, scale_weights); std::unique_ptr graph(new ir::Graph(prog)); auto pass = - PassRegistry::Instance().Get("int8_scale_calculation_mkldnn_pass"); + PassRegistry::Instance().Get("int8_scale_calculation_onednn_pass"); int original_nodes_num = graph->Nodes().size(); graph.reset(pass->Apply(graph.release())); int current_nodes_num = graph->Nodes().size(); @@ -153,4 +153,4 @@ TEST(Int8ScaleCalculationMkldnnPass, } // namespace framework } // namespace paddle -USE_PASS(int8_scale_calculation_mkldnn_pass); +USE_PASS(int8_scale_calculation_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc similarity index 90% rename from paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc index 04a6f8d6b770d..8f384931a589c 100644 --- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h" #include #include @@ -36,12 +36,12 @@ void InterpolateOneDNNPass::ApplyImpl(ir::Graph* graph) const { platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); if (!(graph->Has("use_mkldnn") && graph->Get("use_mkldnn"))) { - VLOG(3) << "Do not handle interpolate_mkldnn_pass"; + VLOG(3) << "Do not handle interpolate_onednn_pass"; return; } - VLOG(4) << "Handle interpolate_mkldnn_pass"; + VLOG(4) << "Handle interpolate_onednn_pass"; - Init("interpolate_mkldnn_pass", graph); + Init("interpolate_onednn_pass", graph); int found_count = 0; const std::vector interpolate_op_types = {"bilinear_interp", @@ -69,5 +69,5 @@ void InterpolateOneDNNPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(interpolate_mkldnn_pass, +REGISTER_PASS(interpolate_onednn_pass, paddle::framework::ir::InterpolateOneDNNPass); diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc index d547f6fdd1ba2..66c96c268141d 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -39,11 +39,11 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( Graph* graph, const std::string& matmul_type, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(matmul_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(matmul_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation matmul_act_pattern( - gpd.mutable_pattern(), "matmul_activation_mkldnn_fuse"); + gpd.mutable_pattern(), "matmul_activation_onednn_fuse"); matmul_act_pattern(matmul_type, act_type); int found_matmul_activation_count = 0; @@ -52,7 +52,7 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( VLOG(4) << "handle " + matmul_type + "+" + act_type + " fuse"; if (!IsCompat(subgraph, g)) { - LOG(WARNING) << "matmul_activation_mkldnn_fuse_pass op compat failed."; + LOG(WARNING) << "matmul_activation_onednn_fuse_pass op compat failed."; return; } @@ -288,10 +288,10 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_activation_mkldnn_fuse_pass, +REGISTER_PASS(matmul_activation_onednn_fuse_pass, paddle::framework::ir::MatmulActivationMkldnnFusePass); -REGISTER_PASS_CAPABILITY(matmul_activation_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_activation_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h index ebef63e292438..eec62d9e066fa 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h @@ -33,7 +33,7 @@ class MatmulActivationMkldnnFusePass : public FusePassBase { void FuseMatmulAct(Graph *graph, const std::string &matmul_type, - std::string &act_type) const; + std::string &act_type) const; // NOLINT }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc index 5bb153d3ece0b..8d80eb57e5032 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -62,7 +62,7 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd( if (FindFuseOption(*matmul, *elementwise_add) != FUSE_MKLDNN) return; if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "op compat for matmul_elementwise_add_mkldnn_fuse_pass failed."; + << "op compat for matmul_elementwise_add_onednn_fuse_pass failed."; return; } @@ -167,9 +167,9 @@ MatmulElementwiseAddMKLDNNFusePass::MatmulElementwiseAddMKLDNNFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_elementwise_add_mkldnn_fuse_pass, +REGISTER_PASS(matmul_elementwise_add_onednn_fuse_pass, paddle::framework::ir::MatmulElementwiseAddMKLDNNFusePass); -REGISTER_PASS_CAPABILITY(matmul_elementwise_add_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_elementwise_add_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc index a899744672b4b..0b742d763bebc 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -37,12 +37,12 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init(matmul_type + "_transpose_reshape_mkldnn_fuse_pass", + FusePassBase::Init(matmul_type + "_transpose_reshape_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::MatmulTransposeReshapePattern mtrp( gpd.mutable_pattern(), - matmul_type + "_transpose_reshape_mkldnn_fuse_pass"); + matmul_type + "_transpose_reshape_onednn_fuse_pass"); mtrp(matmul_type); int found_matmul_transpose_reshape_count = 0; @@ -206,10 +206,10 @@ MatmulTransposeReshapeMKLDNNPass::MatmulTransposeReshapeMKLDNNPass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_transpose_reshape_mkldnn_fuse_pass, +REGISTER_PASS(matmul_transpose_reshape_onednn_fuse_pass, paddle::framework::ir::MatmulTransposeReshapeMKLDNNPass); -REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc index a1f74d3423006..3b95f27a2d302 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h" #include diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc index 7af7b67c4da49..214b8e12fd0b1 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h" #include #include @@ -23,7 +23,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/onednn/onednn_pass_util.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h rename to paddle/fluid/framework/ir/onednn/onednn_pass_util.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc index 23e5497b12fde..7ff379f5e9120 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_placement_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/operator.h" @@ -94,10 +94,10 @@ bool MKLDNNPlacementPass::IsSupport(const Node* op) const { } // namespace framework } // namespace paddle -REGISTER_PASS(mkldnn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass) +REGISTER_PASS(onednn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass) .RequirePassAttr("mkldnn_enabled_op_types"); -REGISTER_PASS_CAPABILITY(mkldnn_placement_pass) +REGISTER_PASS_CAPABILITY(onednn_placement_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "fusion_gru", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc index b7697252a67c4..052c59ef84a99 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_placement_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/utils/tribool.h" @@ -133,7 +133,7 @@ class PlacementPassTest { RegisterOpKernel({"conv2d", "pool2d", "concat", "relu"}); std::unique_ptr graph(new ir::Graph(prog)); - auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass"); + auto pass = PassRegistry::Instance().Get("onednn_placement_pass"); pass->Set("mkldnn_enabled_op_types", new std::unordered_set(mkldnn_enabled_op_types)); @@ -156,7 +156,7 @@ class PlacementPassTest { } void PlacementNameTest() { - auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass"); + auto pass = PassRegistry::Instance().Get("onednn_placement_pass"); EXPECT_EQ(static_cast(pass.get())->GetPlacementName(), "MKLDNN"); } @@ -186,4 +186,4 @@ TEST(MKLDNNPlacementPass, placement_name) { } // namespace framework } // namespace paddle -USE_PASS(mkldnn_placement_pass); +USE_PASS(onednn_placement_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc index b128159237546..a21ddd579be3c 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc index 4f6c2bfe0507b..2910849af5f8d 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc index 28b01bc065b37..2aea55f473fd4 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc index 11eba402b55d4..2255458535071 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { @@ -131,7 +131,7 @@ void ParamsQuantizationMkldnnPass::QuantizeConv(ir::Graph* graph, LOG(WARNING) << "Pass in op compat failed."; return; } - VLOG(4) << "handle convolution in params_quantization_mkldnn_pass"; + VLOG(4) << "handle convolution in params_quantization_onednn_pass"; GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); @@ -179,9 +179,9 @@ void ParamsQuantizationMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(params_quantization_mkldnn_pass, +REGISTER_PASS(params_quantization_onednn_pass, paddle::framework::ir::ParamsQuantizationMkldnnPass); -REGISTER_PASS_CAPABILITY(params_quantization_mkldnn_pass) +REGISTER_PASS_CAPABILITY(params_quantization_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h index e681d9701b8d8..c8bf17cb081ec 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h @@ -37,7 +37,7 @@ class ParamsQuantizationMkldnnPass : public FusePassBase { bool with_residual_connection) const; private: - const std::string name_scope_ = "params_quantization_mkldnn_pass"; + const std::string name_scope_ = "params_quantization_onednn_pass"; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc index bad1f4597f4a2..36ff2110e582f 100755 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT +#include "paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h" // NOLINT #include "paddle/fluid/imperative/type_defs.h" #include "paddle/phi/common/place.h" @@ -245,7 +245,7 @@ struct ParamsQuantizationMkldnnPassTestFixture : public ::testing::Test { void RunPassTest(std::unique_ptr program) { auto graph = program->CreateGraph(); - auto pass = PassRegistry::Instance().Get("params_quantization_mkldnn_pass"); + auto pass = PassRegistry::Instance().Get("params_quantization_onednn_pass"); graph.reset(pass->Apply(graph.release())); program->CheckGraph(graph); @@ -384,4 +384,4 @@ TEST_F(ParamsQuantizationMkldnnPassTestFixture, conv_with_bias_2g2o2i1h1ws) { } // namespace framework } // namespace paddle -USE_PASS(params_quantization_mkldnn_pass); +USE_PASS(params_quantization_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc index 734915b0dfe95..6ffd3963504f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h" #include #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -703,8 +703,8 @@ void QuantDequantMkldnnPass::RemoveCtrlVars(ir::Graph* graph) const { } void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { - VLOG(3) << "Convert paddle slim quantized model to mkldnn quantized model."; - const std::string pattern_name = "quant_dequant_mkldnn_pass"; + VLOG(3) << "Convert paddle slim quantized model to onednn quantized model."; + const std::string pattern_name = "quant_dequant_onednn_pass"; FusePassBase::Init(pattern_name, graph); const std::unordered_set skip_ops = {"conv2d", @@ -753,7 +753,7 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { RemoveCtrlVars(graph); // save var_quant_scales in the temporary save op's attr - // for compute_propagate_scales_mkldnn_pass + // for compute_propagate_scales_onednn_pass SaveInfoInTheTmpOp( graph, "has_quant_info", "var_quant_scales", var_quant_scales); } @@ -762,10 +762,10 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(quant_dequant_mkldnn_pass, +REGISTER_PASS(quant_dequant_onednn_pass, paddle::framework::ir::QuantDequantMkldnnPass); -REGISTER_PASS_CAPABILITY(quant_dequant_mkldnn_pass) +REGISTER_PASS_CAPABILITY(quant_dequant_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc index 5d5edb83a9134..37dfec26b36f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc index 07675a3f4efeb..f3250c32604c6 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -53,13 +53,13 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init("reshape_transpose_" + matmul_type + "_mkldnn_fuse_pass", + FusePassBase::Init("reshape_transpose_" + matmul_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::ReshapeTransposeMatmulPattern rtm_pattern( gpd.mutable_pattern(), - "reshape_transpose_" + matmul_type + "_mkldnn_fuse_pass"); + "reshape_transpose_" + matmul_type + "_onednn_fuse_pass"); rtm_pattern(matmul_type, with_reshape_xshape, with_transpose_xshape); @@ -68,7 +68,7 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( Graph *g) { if (!IsCompat(subgraph, g)) { LOG(WARNING) << "Op compatible check in reshape_transpose_" << matmul_type - << "_mkldnn_fuse_pass failed."; + << "_onednn_fuse_pass failed."; return; } @@ -268,10 +268,10 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(reshape_transpose_matmul_mkldnn_fuse_pass, +REGISTER_PASS(reshape_transpose_matmul_onednn_fuse_pass, paddle::framework::ir::ReshapeTransposeMatmulMkldnnFusePass); -REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("reshape2", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc index 9f50aefc46ce5..7ae647c6d28f7 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h" #include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc index e02b167a19e3b..4e409f764491c 100644 --- a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h" #include #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc index 764712a2fcd8a..7bce1813fed8a 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" +#include "paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h" #include @@ -235,9 +235,9 @@ void ShuffleChannelMKLDNNDetectPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(shuffle_channel_mkldnn_detect_pass, +REGISTER_PASS(shuffle_channel_onednn_detect_pass, paddle::framework::ir::ShuffleChannelMKLDNNDetectPass); -REGISTER_PASS_CAPABILITY(shuffle_channel_mkldnn_detect_pass) +REGISTER_PASS_CAPABILITY(shuffle_channel_onednn_detect_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("reshape2", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc index 4c6fc3774e840..da389d3a1353c 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc @@ -16,7 +16,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" +#include "paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { @@ -55,7 +55,7 @@ void MainTest() { int original_nodes_num = graph->Nodes().size(); auto pass = - PassRegistry::Instance().Get("shuffle_channel_mkldnn_detect_pass"); + PassRegistry::Instance().Get("shuffle_channel_onednn_detect_pass"); graph.reset(pass->Apply(graph.release())); int current_nodes_num = graph->Nodes().size(); @@ -82,4 +82,4 @@ TEST(ShuffleChannelOneDNNDetectPass, ShuffleChannelOneDNNDetectPassTest) { } // namespace framework } // namespace paddle -USE_PASS(shuffle_channel_mkldnn_detect_pass); +USE_PASS(shuffle_channel_onednn_detect_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc index 2030a7dadc02e..d18765ff27bdd 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc index 1aafcc0614afb..4af9c6a770436 100644 --- a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc @@ -11,8 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index 779d9986ef8a1..0b3ebd324dc7a 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -29,7 +29,7 @@ class Graph; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -68,6 +68,7 @@ static const std::vector xpu_support_subgraph_passes = { "constant_folding_pass", "delete_elementwise_mul_op_pass", "generate_sequence_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass", "embedding_with_eltwise_add_xpu_fuse_pass", "multi_encoder_xpu_fuse_pass", "multi_encoder_xpu_adaptive_seqlen_fuse_pass", @@ -82,6 +83,7 @@ static const std::vector xpu_support_subgraph_passes = { "fc_xpu_fuse_pass", "link_xpu_op_max_pass", "xpu_delete_cast_op_pass", + "spatial_transformer_resblock_xpu_fuse_pass", }; static std::vector support_subgraph_generate_passes; diff --git a/paddle/fluid/framework/ir/transfer_layout_pass.cc b/paddle/fluid/framework/ir/transfer_layout_pass.cc index c31737958dffb..b989f51dfe8f9 100644 --- a/paddle/fluid/framework/ir/transfer_layout_pass.cc +++ b/paddle/fluid/framework/ir/transfer_layout_pass.cc @@ -107,13 +107,17 @@ void TransferLayoutPass::ApplyImpl(ir::Graph *graph) const { FusePassBase::Init("fused_conv2d_add_act_layout_transfer", graph); auto *scope = param_scope(); - // only float16 compute precision need insert transfer_layout. + // float16 for all(cutlass cudnn), float32 for cutlass. + // why? + // In the case of cudnn nhwc fp32, performance degradation will occur bool is_fp16_precision = static_cast(Get("model_precision")) == phi::DataType::FLOAT16 || Get("enable_gpu_mixed"); - if (!is_fp16_precision) return; + bool cutlass_enable = Get("use_cutlass"); + + if (!is_fp16_precision && !cutlass_enable) return; PADDLE_ENFORCE_EQ(graph->IsMainGraph(), true, diff --git a/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..cea83dae5e8bf --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc @@ -0,0 +1,666 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h" + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace patterns { + +struct CrossAttentionFusePattern : public PatternBase { + CrossAttentionFusePattern(PDPattern* pattern, + const std::string& name_scope, + bool with_q_scale); + + // declare operator node's name + PATTERN_DECL_NODE(q_mul); + PATTERN_DECL_NODE(k_mul); + PATTERN_DECL_NODE(v_mul); + PATTERN_DECL_NODE(q_add); + PATTERN_DECL_NODE(k_add); + PATTERN_DECL_NODE(v_add); + PATTERN_DECL_NODE(reshape_1); + PATTERN_DECL_NODE(reshape_2); + PATTERN_DECL_NODE(reshape_3); + PATTERN_DECL_NODE(transpose_1); + PATTERN_DECL_NODE(transpose_2); + PATTERN_DECL_NODE(transpose_3); + PATTERN_DECL_NODE(scale); + PATTERN_DECL_NODE(qk_matmul); + PATTERN_DECL_NODE(qk_add); + PATTERN_DECL_NODE(qk_softmax); + PATTERN_DECL_NODE(qkv_matmul); + PATTERN_DECL_NODE(transpose_4); + PATTERN_DECL_NODE(reshape_4); + + // declare variable node's name + PATTERN_DECL_NODE(input_q); + PATTERN_DECL_NODE(input_kv); + PATTERN_DECL_NODE(mask); + PATTERN_DECL_NODE(q_mul_w); + PATTERN_DECL_NODE(k_mul_w); + PATTERN_DECL_NODE(v_mul_w); + PATTERN_DECL_NODE(q_mul_out); + PATTERN_DECL_NODE(k_mul_out); + PATTERN_DECL_NODE(v_mul_out); + PATTERN_DECL_NODE(q_add_bias); + PATTERN_DECL_NODE(k_add_bias); + PATTERN_DECL_NODE(v_add_bias); + PATTERN_DECL_NODE(q_add_out); + PATTERN_DECL_NODE(k_add_out); + PATTERN_DECL_NODE(v_add_out); + PATTERN_DECL_NODE(reshape_1_out); + PATTERN_DECL_NODE(reshape_2_out); + PATTERN_DECL_NODE(reshape_3_out); + PATTERN_DECL_NODE(transpose_1_out); + PATTERN_DECL_NODE(transpose_2_out); + PATTERN_DECL_NODE(transpose_3_out); + PATTERN_DECL_NODE(scale_out); + PATTERN_DECL_NODE(qk_matmul_out); + PATTERN_DECL_NODE(qk_add_out); + PATTERN_DECL_NODE(qk_softmax_out); + PATTERN_DECL_NODE(qkv_matmul_out); + PATTERN_DECL_NODE(transpose_4_out); + PATTERN_DECL_NODE(output); + + private: + bool with_q_scale_{false}; +}; + +CrossAttentionFusePattern::CrossAttentionFusePattern( + PDPattern* pattern, const std::string& name_scope, bool with_q_scale) + : PatternBase(pattern, name_scope, name_scope), + with_q_scale_(with_q_scale) { + auto* input_q = pattern->NewNode(input_q_repr()) + ->assert_is_op_input("matmul_v2", "X") + ->AsInput(); + auto* input_kv = pattern->NewNode(input_kv_repr()) + ->assert_is_op_input("matmul_v2", "X") + ->AsInput(); + auto* mask = pattern->NewNode(mask_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto* q_mul_w = + pattern->NewNode(q_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* q_mul = pattern->NewNode(q_mul_repr())->assert_is_op("matmul_v2"); + auto* q_mul_out = pattern->NewNode(q_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* k_mul_w = + pattern->NewNode(k_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* k_mul = pattern->NewNode(k_mul_repr())->assert_is_op("matmul_v2"); + auto* k_mul_out = pattern->NewNode(k_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* v_mul_w = + pattern->NewNode(v_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* v_mul = pattern->NewNode(v_mul_repr())->assert_is_op("matmul_v2"); + auto* v_mul_out = pattern->NewNode(v_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* q_add = pattern->NewNode(q_add_repr())->assert_is_op("elementwise_add"); + auto* q_add_bias = pattern->NewNode(q_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* q_add_out = pattern->NewNode(q_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* k_add = pattern->NewNode(k_add_repr())->assert_is_op("elementwise_add"); + auto* k_add_bias = pattern->NewNode(k_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* k_add_out = pattern->NewNode(k_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* v_add = pattern->NewNode(v_add_repr())->assert_is_op("elementwise_add"); + auto* v_add_bias = pattern->NewNode(v_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* v_add_out = pattern->NewNode(v_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* reshape_1 = + pattern->NewNode(reshape_1_repr())->assert_is_op("reshape2"); + auto* reshape_1_out = pattern->NewNode(reshape_1_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* reshape_2 = + pattern->NewNode(reshape_2_repr())->assert_is_op("reshape2"); + auto* reshape_2_out = pattern->NewNode(reshape_2_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* reshape_3 = + pattern->NewNode(reshape_3_repr())->assert_is_op("reshape2"); + auto* reshape_3_out = pattern->NewNode(reshape_3_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* transpose_1 = + pattern->NewNode(transpose_1_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + + auto* transpose_2 = + pattern->NewNode(transpose_2_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + auto* transpose_2_out = pattern->NewNode(transpose_2_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "Y"); + auto* transpose_3 = + pattern->NewNode(transpose_3_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + auto* transpose_3_out = pattern->NewNode(transpose_3_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "Y"); + PDNode* transpose_1_out = nullptr; + PDNode* scale = nullptr; + PDNode* scale_out = nullptr; + if (with_q_scale_) { + transpose_1_out = pattern->NewNode(transpose_1_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("scale", "X"); + scale = pattern->NewNode(scale_repr())->assert_is_op("scale"); + scale_out = pattern->NewNode(scale_out_repr()) + ->assert_is_op_output("scale", "Out") + ->assert_is_op_input("matmul_v2", "X"); + } else { + transpose_1_out = pattern->NewNode(transpose_1_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "X"); + } + auto* qk_matmul = + pattern->NewNode(qk_matmul_repr())->assert_is_op("matmul_v2"); + auto* qk_matmul_out = pattern->NewNode(qk_matmul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* qk_add = + pattern->NewNode(qk_add_repr())->assert_is_op("elementwise_add"); + auto* qk_add_out = pattern->NewNode(qk_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("softmax", "X"); + auto* qk_softmax = + pattern->NewNode(qk_softmax_repr())->assert_is_op("softmax"); + auto* qk_softmax_out = pattern->NewNode(qk_softmax_out_repr()) + ->assert_is_op_output("softmax", "Out") + ->assert_is_op_input("matmul_v2", "X"); + auto* qkv_matmul = + pattern->NewNode(qkv_matmul_repr())->assert_is_op("matmul_v2"); + auto* qkv_matmul_out = pattern->NewNode(qkv_matmul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* transpose_4 = + pattern->NewNode(transpose_4_repr())->assert_is_op("transpose2"); + auto* transpose_4_out = pattern->NewNode(transpose_4_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* reshape_4 = + pattern->NewNode(reshape_4_repr())->assert_is_op("reshape2"); + auto* output = pattern->NewNode(output_repr()) + ->AsOutput() + ->assert_is_op_output("reshape2", "Out"); + + // link nodes + q_mul->LinksFrom({input_q, q_mul_w}).LinksTo({q_mul_out}); + q_add->LinksFrom({q_mul_out, q_add_bias}).LinksTo({q_add_out}); + reshape_1->LinksFrom({q_add_out}).LinksTo({reshape_1_out}); + transpose_1->LinksFrom({reshape_1_out}).LinksTo({transpose_1_out}); + k_mul->LinksFrom({input_kv, k_mul_w}).LinksTo({k_mul_out}); + k_add->LinksFrom({k_mul_out, k_add_bias}).LinksTo({k_add_out}); + reshape_2->LinksFrom({k_add_out}).LinksTo({reshape_2_out}); + transpose_2->LinksFrom({reshape_2_out}).LinksTo({transpose_2_out}); + if (with_q_scale_) { + scale->LinksFrom({transpose_1_out}).LinksTo({scale_out}); + qk_matmul->LinksFrom({scale_out, transpose_2_out}).LinksTo({qk_matmul_out}); + } else { + qk_matmul->LinksFrom({transpose_1_out, transpose_2_out}) + .LinksTo({qk_matmul_out}); + } + qk_add->LinksFrom({qk_matmul_out, mask}).LinksTo({qk_add_out}); + qk_softmax->LinksFrom({qk_add_out}).LinksTo({qk_softmax_out}); + v_mul->LinksFrom({input_kv, v_mul_w}).LinksTo({v_mul_out}); + v_add->LinksFrom({v_mul_out, v_add_bias}).LinksTo({v_add_out}); + reshape_3->LinksFrom({v_add_out}).LinksTo({reshape_3_out}); + transpose_3->LinksFrom({reshape_3_out}).LinksTo({transpose_3_out}); + qkv_matmul->LinksFrom({qk_softmax_out, transpose_3_out}) + .LinksTo({qkv_matmul_out}); + transpose_4->LinksFrom({qkv_matmul_out}).LinksTo({transpose_4_out}); + reshape_4->LinksFrom({transpose_4_out}).LinksTo({output}); +} + +} // namespace patterns + +void CrossAttentionXPUFusePass::PrepareQKVWeight(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* w, + Node** real_w, + Node** w_max) const { + phi::DenseTensor w_tensor; + phi::DenseTensor w_int16_tensor; + phi::DenseTensor w_max_tensor; + + Assign(scope->Var(w->Name())->Get(), &w_tensor); + CastToFp32(&w_tensor, &w_int16_tensor); + ConvertWithQuant( + &w_int16_tensor, &w_max_tensor, nullptr, false); + + size_t real_w_hash = HashTensor(w_int16_tensor); + size_t w_max_hash = HashTensor(w_max_tensor); + std::string real_w_name = std::to_string(real_w_hash); + std::string w_max_name = std::to_string(w_max_hash); + + *real_w = FindNodeWithName(graph, real_w_name); + + if (*real_w == nullptr) { + // Create real_w node + // Update real_w var_desc in block + VarDesc real_w_desc(real_w_name); + real_w_desc.SetPersistable(true); + real_w_desc.SetShape(common::vectorize(w_int16_tensor.dims())); + real_w_desc.SetDataType( + framework::TransToProtoVarType(w_int16_tensor.dtype())); + *real_w = graph->CreateVarNode(&real_w_desc); + auto* block_real_w_desc = block->Var(real_w_name); + block_real_w_desc->SetPersistable(real_w_desc.Persistable()); + block_real_w_desc->SetShape(real_w_desc.GetShape()); + block_real_w_desc->SetDataType(real_w_desc.GetDataType()); + // Create w_max node + // Update w_max var_desc in block + VarDesc w_max_desc(w_max_name); + w_max_desc.SetPersistable(true); + w_max_desc.SetShape(common::vectorize(w_max_tensor.dims())); + w_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + *w_max = graph->CreateVarNode(&w_max_desc); + auto* block_w_max_desc = block->Var(w_max_name); + block_w_max_desc->SetPersistable(w_max_desc.Persistable()); + block_w_max_desc->SetShape(w_max_desc.GetShape()); + block_w_max_desc->SetDataType(w_max_desc.GetDataType()); + + // Find real_w/w_max variable in scope + auto* w_var = scope->FindVar(real_w_name); + if (w_var == nullptr) { + // Create qkv_w_intx/qkv_w_max variable/tensor + Assign(w_int16_tensor, + scope->Var(real_w_name)->GetMutable()); + Assign(w_max_tensor, + scope->Var(w_max_name)->GetMutable()); + } else { + // Share the same variable + PADDLE_ENFORCE_NOT_NULL( + scope->FindVar(w_max_name), + platform::errors::Fatal( + "w_max(%s) variable should not be nullptr if real_w(%s) " + "variable is exist.", + w_max_name, + real_w_name)); + } + } else { + *w_max = FindNodeWithName(graph, w_max_name); + PADDLE_ENFORCE_NOT_NULL( + *w_max, + platform::errors::Fatal( + "w_max(%s) variable should not be nullptr if real_w(%s) " + "variable is exist.", + w_max_name, + real_w_name)); + } +} + +void CrossAttentionXPUFusePass::PrepareQKVBias(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* q_bias, + Node* k_bias, + Node* v_bias, + Node** real_q_bias, + Node** real_k_bias, + Node** real_v_bias) const { + phi::DenseTensor* q_bias_tensor; + phi::DenseTensor* k_bias_tensor; + phi::DenseTensor* v_bias_tensor; + phi::DenseTensor q_bias_fp32_tensor; + phi::DenseTensor k_bias_fp32_tensor; + phi::DenseTensor v_bias_fp32_tensor; + q_bias_tensor = scope->Var(q_bias->Name())->GetMutable(); + k_bias_tensor = scope->Var(k_bias->Name())->GetMutable(); + v_bias_tensor = scope->Var(v_bias->Name())->GetMutable(); + CastToFp32(q_bias_tensor, &q_bias_fp32_tensor); + CastToFp32(k_bias_tensor, &k_bias_fp32_tensor); + CastToFp32(v_bias_tensor, &v_bias_fp32_tensor); + + size_t q_bias_hash = HashTensor(q_bias_fp32_tensor); + std::string q_bias_name = std::to_string(q_bias_hash); + *real_q_bias = FindNodeWithName(graph, q_bias_name); + + size_t k_bias_hash = HashTensor(k_bias_fp32_tensor); + std::string k_bias_name = std::to_string(k_bias_hash); + *real_k_bias = FindNodeWithName(graph, k_bias_name); + + size_t v_bias_hash = HashTensor(v_bias_fp32_tensor); + std::string v_bias_name = std::to_string(v_bias_hash); + *real_v_bias = FindNodeWithName(graph, v_bias_name); + if (*real_q_bias == nullptr) { + // Create q_bias node + // Update q_bias var_desc in block + VarDesc q_bias_desc(q_bias_name); + q_bias_desc.SetPersistable(true); + q_bias_desc.SetShape(common::vectorize(q_bias_fp32_tensor.dims())); + q_bias_desc.SetDataType( + framework::TransToProtoVarType(q_bias_fp32_tensor.dtype())); + *real_q_bias = graph->CreateVarNode(&q_bias_desc); + auto* block_q_bias_desc = block->Var(q_bias_name); + block_q_bias_desc->SetPersistable(q_bias_desc.Persistable()); + block_q_bias_desc->SetShape(q_bias_desc.GetShape()); + block_q_bias_desc->SetDataType(q_bias_desc.GetDataType()); + Assign(q_bias_fp32_tensor, + scope->Var(q_bias_name)->GetMutable()); + } + if (*real_k_bias == nullptr) { + // Create k_bias node + // Update k_bias var_desc in block + VarDesc k_bias_desc(k_bias_name); + k_bias_desc.SetPersistable(true); + k_bias_desc.SetShape(common::vectorize(k_bias_fp32_tensor.dims())); + k_bias_desc.SetDataType( + framework::TransToProtoVarType(k_bias_fp32_tensor.dtype())); + *real_k_bias = graph->CreateVarNode(&k_bias_desc); + auto* block_k_bias_desc = block->Var(k_bias_name); + block_k_bias_desc->SetPersistable(k_bias_desc.Persistable()); + block_k_bias_desc->SetShape(k_bias_desc.GetShape()); + block_k_bias_desc->SetDataType(k_bias_desc.GetDataType()); + Assign(k_bias_fp32_tensor, + scope->Var(k_bias_name)->GetMutable()); + } + if (*real_v_bias == nullptr) { + // Create v_bias node + // Update v_bias var_desc in block + VarDesc v_bias_desc(v_bias_name); + v_bias_desc.SetPersistable(true); + v_bias_desc.SetShape(common::vectorize(v_bias_fp32_tensor.dims())); + v_bias_desc.SetDataType( + framework::TransToProtoVarType(v_bias_fp32_tensor.dtype())); + *real_v_bias = graph->CreateVarNode(&v_bias_desc); + auto* block_v_bias_desc = block->Var(v_bias_name); + block_v_bias_desc->SetPersistable(v_bias_desc.Persistable()); + block_v_bias_desc->SetShape(v_bias_desc.GetShape()); + block_v_bias_desc->SetDataType(v_bias_desc.GetDataType()); + Assign(v_bias_fp32_tensor, + scope->Var(v_bias_name)->GetMutable()); + } +} + +void CrossAttentionXPUFusePass::ApplyCrossAttentionXPUFuse( + ir::Graph* graph, bool with_q_scale) const { + GraphPatternDetector gpd; + patterns::CrossAttentionFusePattern pattern( + gpd.mutable_pattern(), name_scope_, with_q_scale); + int found_subgraph_count = 0; + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle CrossAttentionXPUFusePass"; + + // declare operator node's name + GET_IR_NODE(q_mul); + GET_IR_NODE(k_mul); + GET_IR_NODE(v_mul); + GET_IR_NODE(q_add); + GET_IR_NODE(k_add); + GET_IR_NODE(v_add); + GET_IR_NODE(reshape_1); + GET_IR_NODE(reshape_2); + GET_IR_NODE(reshape_3); + GET_IR_NODE(transpose_1); + GET_IR_NODE(transpose_2); + GET_IR_NODE(transpose_3); + GET_IR_NODE(scale); + GET_IR_NODE(qk_matmul); + GET_IR_NODE(qk_add); + GET_IR_NODE(qk_softmax); + GET_IR_NODE(qkv_matmul); + GET_IR_NODE(transpose_4); + GET_IR_NODE(reshape_4); + + // declare variable node's name + GET_IR_NODE(input_q); + GET_IR_NODE(input_kv); + GET_IR_NODE(mask); + GET_IR_NODE(q_mul_w); + GET_IR_NODE(k_mul_w); + GET_IR_NODE(v_mul_w); + GET_IR_NODE(q_mul_out); + GET_IR_NODE(k_mul_out); + GET_IR_NODE(v_mul_out); + GET_IR_NODE(q_add_bias); + GET_IR_NODE(k_add_bias); + GET_IR_NODE(v_add_bias); + GET_IR_NODE(q_add_out); + GET_IR_NODE(k_add_out); + GET_IR_NODE(v_add_out); + GET_IR_NODE(reshape_1_out); + GET_IR_NODE(reshape_2_out); + GET_IR_NODE(reshape_3_out); + GET_IR_NODE(transpose_1_out); + GET_IR_NODE(transpose_2_out); + GET_IR_NODE(transpose_3_out); + GET_IR_NODE(scale_out); + GET_IR_NODE(qk_matmul_out); + GET_IR_NODE(qk_add_out); + GET_IR_NODE(qk_softmax_out); + GET_IR_NODE(qkv_matmul_out); + GET_IR_NODE(transpose_4_out); + GET_IR_NODE(output); + + // generate fuse op + auto* scope = param_scope(); + auto* block = q_mul->Op()->Block(); + framework::OpDesc fused_op_desc(block); + fused_op_desc.SetType("cross_attention_xpu"); + + Node* real_q_w = nullptr; + Node* q_w_max = nullptr; + Node* real_k_w = nullptr; + Node* k_w_max = nullptr; + Node* real_v_w = nullptr; + Node* v_w_max = nullptr; + PrepareQKVWeight(graph, scope, block, q_mul_w, &real_q_w, &q_w_max); + PrepareQKVWeight(graph, scope, block, k_mul_w, &real_k_w, &k_w_max); + PrepareQKVWeight(graph, scope, block, v_mul_w, &real_v_w, &v_w_max); + + std::vector fc_weight_nodes = {real_q_w, real_k_w, real_v_w}; + std::vector fc_weight_names; + for (auto* node : fc_weight_nodes) { + if (node) { + fc_weight_names.push_back(node->Name()); + } + } + std::vector fc_weight_max_nodes = {q_w_max, k_w_max, v_w_max}; + std::vector fc_weight_max_names; + for (auto* node : fc_weight_max_nodes) { + if (node) { + fc_weight_max_names.push_back(node->Name()); + } + } + + Node* q_add_bias_fp32 = nullptr; + Node* k_add_bias_fp32 = nullptr; + Node* v_add_bias_fp32 = nullptr; + PrepareQKVBias(graph, + scope, + block, + q_add_bias, + k_add_bias, + v_add_bias, + &q_add_bias_fp32, + &k_add_bias_fp32, + &v_add_bias_fp32); + std::vector fc_bias_nodes = { + q_add_bias_fp32, k_add_bias_fp32, v_add_bias_fp32}; + std::vector fc_bias_names; + for (auto* node : fc_bias_nodes) { + if (node) { + fc_bias_names.push_back(node->Name()); + } + } + + // set input of fuse_op + fused_op_desc.SetInput("input_q", {input_q->Name()}); + fused_op_desc.SetInput("input_kv", {input_kv->Name()}); + fused_op_desc.SetInput("fc_weight", fc_weight_names); + fused_op_desc.SetInput("fc_weight_max", fc_weight_max_names); + fused_op_desc.SetInput("fc_bias", fc_bias_names); + fused_op_desc.SetInput("mask", {mask->Name()}); + + // set attributes of fuse_op + if (with_q_scale) { + float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); + fused_op_desc.SetAttr("alpha", scale_val); + VLOG(4) << "while with_q_scale, scale_val = " << scale_val; + } else { + // in xdnn, 0.0f is default value of NewBaseAttnParam.alpha + fused_op_desc.SetAttr("alpha", 0.0f); + } + fused_op_desc.SetAttr( + "head_num", static_cast(transpose_1_out->Var()->GetShape()[1])); + fused_op_desc.SetAttr( + "head_dim", static_cast(transpose_1_out->Var()->GetShape()[3])); + // TODO(tianrui): support more out_dtype + fused_op_desc.SetAttr("out_dtype", input_q->Var()->GetDataType()); + + // set output of fuse_op + VarDesc fused_op_out_max_desc("qkv_max"); + Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); + fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); + fused_op_desc.SetOutput("qkv", {output->Name()}); + + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + + // link input of fuse_op + IR_NODE_LINK_TO(input_q, fused_op); + IR_NODE_LINK_TO(input_kv, fused_op); + for (auto* node : fc_weight_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + for (auto* node : fc_weight_max_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + for (auto* node : fc_bias_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + // link output of fuse_op + IR_NODE_LINK_TO(fused_op, output); + IR_NODE_LINK_TO(fused_op, fused_op_out_max); + + // delete useless node + std::unordered_set del_node_set; + del_node_set.insert(q_mul); + del_node_set.insert(q_mul_out); + del_node_set.insert(k_mul); + del_node_set.insert(k_mul_out); + del_node_set.insert(v_mul); + del_node_set.insert(v_mul_out); + del_node_set.insert(q_add); + del_node_set.insert(q_add_out); + del_node_set.insert(k_add); + del_node_set.insert(k_add_out); + del_node_set.insert(v_add); + del_node_set.insert(v_add_out); + del_node_set.insert(reshape_1); + del_node_set.insert(reshape_1_out); + del_node_set.insert(reshape_2); + del_node_set.insert(reshape_2_out); + del_node_set.insert(reshape_3); + del_node_set.insert(reshape_3_out); + del_node_set.insert(transpose_1); + del_node_set.insert(transpose_1_out); + del_node_set.insert(transpose_2); + del_node_set.insert(transpose_2_out); + del_node_set.insert(transpose_3); + del_node_set.insert(transpose_3_out); + del_node_set.insert(qk_matmul); + del_node_set.insert(qk_matmul_out); + del_node_set.insert(qk_add); + del_node_set.insert(qk_add_out); + del_node_set.insert(qk_softmax); + del_node_set.insert(qk_softmax_out); + del_node_set.insert(qkv_matmul); + del_node_set.insert(qkv_matmul_out); + del_node_set.insert(transpose_4); + del_node_set.insert(transpose_4_out); + del_node_set.insert(reshape_4); + if (with_q_scale) { + del_node_set.insert(scale); + del_node_set.insert(scale_out); + } + GraphSafeRemoveNodes(graph, del_node_set); + + found_subgraph_count++; + }; + + gpd(graph, handler); + AddStatis(found_subgraph_count); +} + +void CrossAttentionXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + + for (auto with_q_scale : {true, false}) { + ApplyCrossAttentionXPUFuse(graph, with_q_scale); + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cross_attention_xpu_fuse_pass, + paddle::framework::ir::CrossAttentionXPUFusePass); + +REGISTER_PASS_CAPABILITY(cross_attention_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "cross_attention_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h new file mode 100644 index 0000000000000..9a04275294ea8 --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h @@ -0,0 +1,126 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { + +/* +This pass is used to fuse the cross attention op into one op in decoder. +models . + +Origin subgraph: + + mask input_q input_kv + | | | | + | | |-----------| + | matmul matmul matmul + | |q |k |v + | | | | + | | | | + | add add add + | | | | + | | | | + | reshape reshape reshape + | | | | + | | | | + | transpose transpose transpose + | | | | + | | | | + | (scale) | | + | | | | + \ |(x) |(y) | + \ \ / | + \ qk_matmul | + \ | | + \ | | + add / + | / + | / + softmax / + \ / + \ / + qkv_matmul + | + | + transpose + | + | + reshape + | + | + output + +------------------------------------------------------- +Fused subgraph: + input_q input_kv + | | + | | + | | + cross_attention_xpu + | + | + | + output + +*/ + +class CrossAttentionXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + void ApplyCrossAttentionXPUFuse(ir::Graph* graph, bool with_q_scale) const; + + // 1. Generate q/k/v_w_max tensor + // 2. Quant q/k/v_w to int16 + void PrepareQKVWeight(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* w, + Node** real_w, + Node** w_max) const; + + // Cast fc_bias to fp32 + void PrepareQKVBias(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* q_bias, + Node* k_bias, + Node* v_bias, + Node** real_q_bias, + Node** real_k_bias, + Node** real_v_bias) const; + + const std::string name_scope_{"cross_attention_xpu_fuse_pass"}; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc index ad8dd1a55a868..c86180e24088a 100644 --- a/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc @@ -17,6 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/ir/xpu/pass_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -162,15 +163,15 @@ DecoderAttentionFusePattern::DecoderAttentionFusePattern( // link nodes reshape2_1->LinksFrom({input_q}).LinksTo({reshape2_1_out}); - reshape2_2->LinksFrom({input_k}).LinksTo({reshape2_2_out}); - reshape2_3->LinksFrom({input_v}).LinksTo({reshape2_3_out}); transpose2_1->LinksFrom({reshape2_1_out}).LinksTo({transpose2_1_out}); + reshape2_2->LinksFrom({input_k}).LinksTo({reshape2_2_out}); transpose2_2->LinksFrom({reshape2_2_out}).LinksTo({transpose2_2_out}); - transpose2_3->LinksFrom({reshape2_3_out}).LinksTo({transpose2_3_out}); qk_matmul->LinksFrom({transpose2_1_out, transpose2_2_out}) .LinksTo({qk_matmul_out}); scale->LinksFrom({qk_matmul_out}).LinksTo({scale_out}); qk_softmax->LinksFrom({scale_out}).LinksTo({qk_softmax_out}); + reshape2_3->LinksFrom({input_v}).LinksTo({reshape2_3_out}); + transpose2_3->LinksFrom({reshape2_3_out}).LinksTo({transpose2_3_out}); qkv_matmul->LinksFrom({qk_softmax_out, transpose2_3_out}) .LinksTo({qkv_matmul_out}); transpose2_4->LinksFrom({qkv_matmul_out}).LinksTo({transpose2_4_out}); @@ -222,6 +223,7 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( GET_IR_NODE(output); // Generate fuse op + auto* scope = param_scope(); auto* block = reshape2_1->Op()->Block(); framework::OpDesc fused_op_desc(block); fused_op_desc.SetType("qkv_attention_xpu"); @@ -230,6 +232,54 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( fused_op_desc.SetInput("q", {input_q->Name()}); fused_op_desc.SetInput("k", {input_k->Name()}); fused_op_desc.SetInput("v", {input_v->Name()}); + std::unordered_map> var_quant_scales = + GetQuantInfoFromTheGraph(graph, "has_quant_info", "var_quant_scales"); + // recored q/k/v max, qk_max, and qkv_max + std::vector input_max_nodes; + if (var_quant_scales.find(input_q->Name()) != var_quant_scales.end() && + var_quant_scales.find(input_k->Name()) != var_quant_scales.end() && + var_quant_scales.find(input_v->Name()) != var_quant_scales.end() && + var_quant_scales.find(qk_matmul_out->Name()) != + var_quant_scales.end() && + var_quant_scales.find(qkv_matmul_out->Name()) != + var_quant_scales.end()) { + std::vector input_max_vec; + input_max_vec.push_back(var_quant_scales.at(input_q->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(input_k->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(input_v->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qk_matmul_out->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qkv_matmul_out->Name())[0]); + std::vector quant_max_names = { + "q_max", "k_max", "v_max", "qk_max", "qkv_max"}; + for (size_t i = 0; i < input_max_vec.size(); i++) { + std::string input_max_name = + input_q->Name() + "_" + std::to_string(i) + "_max_in"; + int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1); + VarDesc input_max_desc(input_max_name); + input_max_desc.SetPersistable(true); + input_max_desc.SetShape({static_cast(max_ptr_size)}); + input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + Node* input_max_in = graph->CreateVarNode(&input_max_desc); + auto* block_input_max_in_desc = block->Var(input_max_name); + block_input_max_in_desc->SetPersistable(input_max_desc.Persistable()); + block_input_max_in_desc->SetShape(input_max_desc.GetShape()); + block_input_max_in_desc->SetDataType(input_max_desc.GetDataType()); + phi::DenseTensor input_max_in_cpu_tensor; + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + input_max_in_cpu_tensor.set_type(phi::DataType::FLOAT32); + input_max_in_cpu_tensor.Resize({max_ptr_size}); + std::vector input_max(max_ptr_size, input_max_vec[i]); + memcpy(cpu_ctx->Alloc(&input_max_in_cpu_tensor), + input_max.data(), + max_ptr_size * sizeof(float)); + Assign(input_max_in_cpu_tensor, + scope->Var(input_max_name)->GetMutable()); + fused_op_desc.SetInput(quant_max_names[i], {input_max_name}); + + input_max_nodes.push_back(input_max_in); + } + } // set attributes of fuse_op float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); @@ -245,9 +295,6 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( fused_op_desc.SetAttr("out_dtype", input_q->Var()->GetDataType()); // set output of fuse_op - VarDesc fused_op_out_max_desc("qkv_max"); - Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); - fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); fused_op_desc.SetOutput("qkv", {output->Name()}); auto* fused_op = graph->CreateOpNode(&fused_op_desc); @@ -256,7 +303,9 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( IR_NODE_LINK_TO(input_k, fused_op); IR_NODE_LINK_TO(input_v, fused_op); IR_NODE_LINK_TO(fused_op, output); - IR_NODE_LINK_TO(fused_op, fused_op_out_max); + for (size_t i = 0; i < input_max_nodes.size(); i++) { + IR_NODE_LINK_TO(input_max_nodes[i], fused_op); + } // delete useless node std::unordered_set del_node_set; diff --git a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc index 04b645a4d33d8..2010d4cb48de0 100644 --- a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc @@ -841,6 +841,35 @@ int FcXPUFusePass::ApplyImpl(ir::Graph* graph, } else if (filter_data_type == phi::DataType::FLOAT16) { op_weights_precision = "float16"; } + if (op_weights_precision == "float32" && + AreScalesPresentForNodes(&var_quant_scales, {mul_w})) { + // convert weight to int8 + auto* var = scope->FindVar(mul_w_name); + PADDLE_ENFORCE_NOT_NULL( + var, + platform::errors::NotFound( + "The input persistable [%s] var of [%s] op is not found.", + mul_w_name)); + auto* weight_tensor = var->GetMutable(); + float* fp32_weight_data = weight_tensor->data(); + std::vector weight_data; + weight_data.resize(weight_tensor->numel()); + for (int i = 0; i < weight_tensor->numel(); i++) { + weight_data[i] = static_cast(fp32_weight_data[i]); + } + const auto weight_dims = weight_tensor->dims(); + weight_tensor->clear(); // clear int weight + weight_tensor->set_type(phi::DataType::INT8); + weight_tensor->Resize(common::make_ddim(common::vectorize(weight_dims))); + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + auto* new_weight_data = cpu_ctx->Alloc(weight_tensor); + memcpy(new_weight_data, + weight_data.data(), + weight_tensor->numel() * sizeof(int8_t)); + op_weights_precision = "int8"; + } + VLOG(4) << "FC fusion fuse pass is running on " << op_weights_precision << " precision!"; auto* block = mul->Op()->Block(); diff --git a/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..86fef3fd0c2ae --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc @@ -0,0 +1,208 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/ir/xpu/quant_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { +namespace patterns { + +/* +fuse gn + activation block in to xpu_ele_fusion op +For example: +graph: + X + Scale | Bias + \ | / + group norm + / | \ + / | \ + variance | mean + | + silu + | + output +------------------------------------------------------ +After the pass is applied: + X + Scale | Bias + \ | / + gn_silu_fusion + | + Out +*/ +struct GroupNormalizeSiluXPUPattern : public PatternBase { + GroupNormalizeSiluXPUPattern(PDPattern* pattern, + const std::string& name_scope); + // declare operator node's name + PATTERN_DECL_NODE(gn); + PATTERN_DECL_NODE(silu); + // declare variable node's name + PATTERN_DECL_NODE(gn_x); + PATTERN_DECL_NODE(gn_bias); + PATTERN_DECL_NODE(gn_scale); + PATTERN_DECL_NODE(gn_y); + PATTERN_DECL_NODE(gn_mean); + PATTERN_DECL_NODE(gn_variance); + PATTERN_DECL_NODE(silu_out); +}; + +GroupNormalizeSiluXPUPattern::GroupNormalizeSiluXPUPattern( + PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, name_scope) { + auto gn = pattern->NewNode(gn_repr())->assert_is_op("group_norm"); + auto gn_x = pattern->NewNode(gn_x_repr()) + ->assert_is_op_input("group_norm", "X") + ->AsInput(); + auto gn_bias = pattern->NewNode(gn_bias_repr()) + ->assert_is_op_input("group_norm", "Bias") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_scale = pattern->NewNode(gn_scale_repr()) + ->assert_is_op_input("group_norm", "Scale") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_y = pattern->NewNode(gn_y_repr()) + ->assert_is_op_output("group_norm", "Y") + ->assert_is_op_input("silu", "X") + ->assert_has_n_outputs(1); + auto gn_mean = pattern->NewNode(gn_mean_repr()) + ->assert_is_op_output("group_norm", "Mean") + ->assert_has_n_outputs(0); + auto gn_variance = pattern->NewNode(gn_variance_repr()) + ->assert_is_op_output("group_norm", "Variance") + ->assert_has_n_outputs(0); + gn->LinksFrom({gn_x, gn_bias, gn_scale}) + .LinksTo({gn_y, gn_mean, gn_variance}); + + auto silu = pattern->NewNode(silu_repr())->assert_is_op("silu"); + auto silu_out = pattern->NewNode(silu_out_repr()) + ->AsOutput() + ->assert_is_op_output("silu", "Out"); + silu->LinksFrom({gn_y}).LinksTo({silu_out}); +} + +} // namespace patterns + +class GroupNormalizeSiluXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + void FuseGroupNormalizeSilu(ir::Graph* graph) const; + + const std::string name_scope_{"group_norm_silu_xpu_fuse_pass"}; +}; + +void GroupNormalizeSiluXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + + FuseGroupNormalizeSilu(graph); +} + +void GroupNormalizeSiluXPUFusePass::FuseGroupNormalizeSilu( + ir::Graph* graph) const { + GraphPatternDetector gpd; + patterns::GroupNormalizeSiluXPUPattern pattern(gpd.mutable_pattern(), + name_scope_); + + int found_subgraph_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle GroupNormalizeSiluXPUFusePass fuse"; + // declare operator node's name + GET_IR_NODE(gn); + GET_IR_NODE(silu); + // declare variable node's name + GET_IR_NODE(gn_x); + GET_IR_NODE(gn_bias); + GET_IR_NODE(gn_scale); + GET_IR_NODE(gn_y); + GET_IR_NODE(gn_mean); + GET_IR_NODE(gn_variance); + GET_IR_NODE(silu_out); + + auto* block = gn->Op()->Block(); + auto* scope = param_scope(); + PADDLE_ENFORCE_NOT_NULL( + scope, platform::errors::InvalidArgument("Scope cannot be nullptr.")); + // delete useless node + std::unordered_set delete_nodes; + + float eps = PADDLE_GET_CONST(float, gn->Op()->GetAttr("epsilon")); + int groups = PADDLE_GET_CONST(int, gn->Op()->GetAttr("groups")); + + std::string fused_op_out_name; + fused_op_out_name = silu_out->Name(); + // Generate add_layernorm fused op + framework::OpDesc fused_op_desc(block); + + fused_op_desc.SetType("group_norm_silu_xpu"); + // set attrs for fused op + fused_op_desc.SetInput("x", {gn_x->Name()}); + fused_op_desc.SetInput("bias", {gn_bias->Name()}); + fused_op_desc.SetInput("scale", {gn_scale->Name()}); + fused_op_desc.SetAttr("epsilon", eps); + fused_op_desc.SetAttr("groups", groups); + fused_op_desc.SetOutput("out", {fused_op_out_name}); + // relink fused op + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + IR_NODE_LINK_TO(gn_x, fused_op); + IR_NODE_LINK_TO(gn_bias, fused_op); + IR_NODE_LINK_TO(gn_scale, fused_op); + IR_NODE_LINK_TO(fused_op, silu_out); + + delete_nodes.insert({gn, silu, gn_y, gn_mean, gn_variance}); + GraphSafeRemoveNodes(graph, delete_nodes); + found_subgraph_count++; + }; + + gpd(graph, handler); + AddStatis(found_subgraph_count); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(group_norm_silu_xpu_fuse_pass, + paddle::framework::ir::GroupNormalizeSiluXPUFusePass); + +REGISTER_PASS_CAPABILITY(group_norm_silu_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "group_norm_silu_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc index 2ca1d081aab89..2d56306e97faa 100644 --- a/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc @@ -17,6 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/ir/xpu/pass_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -212,6 +213,7 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( GET_IR_NODE(output); // Generate fuse op + auto* scope = param_scope(); auto* block = reshape_1->Op()->Block(); framework::OpDesc fused_op_desc(block); fused_op_desc.SetType("qkv_attention_xpu"); @@ -219,6 +221,57 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( fused_op_desc.SetInput("q", {input->Name()}); fused_op_desc.SetInput("k", {input->Name()}); fused_op_desc.SetInput("v", {input->Name()}); + std::unordered_map> var_quant_scales = + GetQuantInfoFromTheGraph(graph, "has_quant_info", "var_quant_scales"); + // recored q/k/v max, qk_max, and qkv_max + std::vector input_max_nodes; + if (var_quant_scales.find(input->Name()) != var_quant_scales.end() && + var_quant_scales.find(qk_matmul_out->Name()) != + var_quant_scales.end() && + var_quant_scales.find(qkv_matmul_out->Name()) != + var_quant_scales.end()) { + std::vector input_max_vec; + input_max_vec.push_back(var_quant_scales.at(input->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qk_matmul_out->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qkv_matmul_out->Name())[0]); + std::vector quant_max_names = { + "input_max", "qk_max", "qkv_max"}; + for (size_t i = 0; i < input_max_vec.size(); i++) { + std::string input_max_name = + input->Name() + "_" + std::to_string(i) + "_max_in"; + int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1); + VarDesc input_max_desc(input_max_name); + input_max_desc.SetPersistable(true); + input_max_desc.SetShape({static_cast(max_ptr_size)}); + input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + Node* input_max_in = graph->CreateVarNode(&input_max_desc); + auto* block_input_max_in_desc = block->Var(input_max_name); + block_input_max_in_desc->SetPersistable(input_max_desc.Persistable()); + block_input_max_in_desc->SetShape(input_max_desc.GetShape()); + block_input_max_in_desc->SetDataType(input_max_desc.GetDataType()); + + phi::DenseTensor input_max_in_cpu_tensor; + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + input_max_in_cpu_tensor.set_type(phi::DataType::FLOAT32); + input_max_in_cpu_tensor.Resize({max_ptr_size}); + std::vector input_max(max_ptr_size, input_max_vec[i]); + memcpy(cpu_ctx->Alloc(&input_max_in_cpu_tensor), + input_max.data(), + max_ptr_size * sizeof(float)); + Assign(input_max_in_cpu_tensor, + scope->Var(input_max_name)->GetMutable()); + if (i == 0) { + fused_op_desc.SetInput("q_max", {input_max_name}); + fused_op_desc.SetInput("k_max", {input_max_name}); + fused_op_desc.SetInput("v_max", {input_max_name}); + } else { + fused_op_desc.SetInput(quant_max_names[i], {input_max_name}); + } + input_max_nodes.push_back(input_max_in); + } + } + // set attributes of fuse_op if (with_q_scale) { float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); @@ -239,16 +292,15 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( fused_op_desc.SetAttr("out_dtype", input->Var()->GetDataType()); // set output of fuse_op - VarDesc fused_op_out_max_desc("qkv_max"); - Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); - fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); fused_op_desc.SetOutput("qkv", {output->Name()}); auto* fused_op = graph->CreateOpNode(&fused_op_desc); IR_NODE_LINK_TO(input, fused_op); IR_NODE_LINK_TO(fused_op, output); - IR_NODE_LINK_TO(fused_op, fused_op_out_max); + for (size_t i = 0; i < input_max_nodes.size(); i++) { + IR_NODE_LINK_TO(input_max_nodes[i], fused_op); + } // delete useless node std::unordered_set del_node_set; diff --git a/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc b/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc index 29a222281b217..eecefa6330d69 100644 --- a/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc +++ b/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..a80d3763c366d --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc @@ -0,0 +1,594 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/ir/xpu/quant_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { +namespace patterns { + +/* +Fuse original subgraph into __xpu__spatial_transformer_resblock op. +Currently there are 3 different original patterns to match. + +Original subgraph (situation 1):(todo) + + ------------Input1 Input2 + | | | + | group_norm silu + | | | + | silu _xpu_fc + | | | + | _xpu_conv2d unsqueeze + | \ / + | \ / + | \ / + | \ / + | elementwise_add + | | + | group_norm + | | + | silu + | | + | _xpu_conv2d + | | + |____________________elementwise_add + | + output + +Original subgraph (situation 2): + + -------------- in + | | + | group_norm_silu_xpu + | | + | conv2d_xpu + | | + | group_norm_silu_x pu + | | + -----------conv2d_xpu + | + out + +Original subgraph (situation 3): + + -------------- in + | | + | group_norm_silu_xpu + | | + | conv2d_xpu + | | + conv2d_xpu group_norm_silu_xpu + | | + -----------conv2d_xpu + | + out + +Fuse to: +(Situation 1):(todo) + Input1 Input2 + \ / + spatial_transformer_resblock_xpu + | + output +or: +(Situation 2 and 3): + in + | + spatial_transformer_resblock_xpu + | + out +*/ +struct SpatialTransformerResBlockXPUPattern : public PatternBase { + SpatialTransformerResBlockXPUPattern(PDPattern* pattern, + const std::string& name_scope, + bool conv_fix = false, + bool input_max = false, + bool has_silu_fc_input = false, + bool include_silu = false); + // declare operator node's name + PATTERN_DECL_NODE(gn_silu_0); + PATTERN_DECL_NODE(conv2d_0); + PATTERN_DECL_NODE(gn_silu_1); + PATTERN_DECL_NODE(conv2d_1); + PATTERN_DECL_NODE(conv2d_2); + // declare variable node's name + PATTERN_DECL_NODE(gn_silu_0_x); + PATTERN_DECL_NODE(gn_silu_0_bias); + PATTERN_DECL_NODE(gn_silu_0_scale); + PATTERN_DECL_NODE(gn_silu_0_out); + PATTERN_DECL_NODE(conv2d_0_bias); + PATTERN_DECL_NODE(conv2d_0_filter); + PATTERN_DECL_NODE(conv2d_0_filter_max); + PATTERN_DECL_NODE(conv2d_0_out); + PATTERN_DECL_NODE(conv2d_0_out_max); + PATTERN_DECL_NODE(gn_silu_1_bias); + PATTERN_DECL_NODE(gn_silu_1_scale); + PATTERN_DECL_NODE(gn_silu_1_out); + PATTERN_DECL_NODE(conv2d_1_bias); + PATTERN_DECL_NODE(conv2d_1_filter); + PATTERN_DECL_NODE(conv2d_1_filter_max); + PATTERN_DECL_NODE(conv2d_1_out); + PATTERN_DECL_NODE(conv2d_1_out_max); + PATTERN_DECL_NODE(conv2d_2_x_max); + PATTERN_DECL_NODE(conv2d_2_bias); + PATTERN_DECL_NODE(conv2d_2_filter); + PATTERN_DECL_NODE(conv2d_2_filter_max); + PATTERN_DECL_NODE(conv2d_2_out); + PATTERN_DECL_NODE(conv2d_2_out_max); + + private: + bool conv_fix_{false}; + bool input_max_{false}; + bool has_silu_fc_input_{false}; + bool include_silu_{false}; +}; + +SpatialTransformerResBlockXPUPattern::SpatialTransformerResBlockXPUPattern( + PDPattern* pattern, + const std::string& name_scope, + bool conv_fix, + bool input_max, + bool has_silu_fc_input, + bool include_silu) + : PatternBase(pattern, name_scope, name_scope), + conv_fix_(conv_fix), + input_max_(input_max), + has_silu_fc_input_(has_silu_fc_input), + include_silu_(include_silu) { + // gn_silu_0 + auto gn_silu_0 = + pattern->NewNode(gn_silu_0_repr())->assert_is_op("group_norm_silu_xpu"); + auto gn_silu_0_x = pattern->NewNode(gn_silu_0_x_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "x") + ->AsInput(); + auto gn_silu_0_bias = pattern->NewNode(gn_silu_0_bias_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "bias") + ->AsInput(); + auto gn_silu_0_scale = + pattern->NewNode(gn_silu_0_scale_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "scale") + ->AsInput(); + auto gn_silu_0_out = pattern->NewNode(gn_silu_0_out_repr()) + ->assert_is_op_output("group_norm_silu_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "x") + ->assert_has_n_outputs(1); + gn_silu_0->LinksFrom({gn_silu_0_x, gn_silu_0_bias, gn_silu_0_scale}) + .LinksTo({gn_silu_0_out}); + + PDNode* conv2d_2_x_max = nullptr; + PDNode* conv2d_2_bias = nullptr; + PDNode* conv2d_2_filter = nullptr; + PDNode* conv2d_2_filter_max = nullptr; + PDNode* conv2d_2_out = nullptr; + PDNode* conv2d_2_out_max = nullptr; + if (conv_fix_) { + gn_silu_0_x->assert_is_op_input("conv2d_xpu", "x"); // conv2d_2 x + if (input_max_) { + conv2d_2_x_max = pattern->NewNode(conv2d_2_x_max_repr()) + ->assert_is_op_input("conv2d_xpu", "x_max") + ->AsInput(); + } + // conv2d_2 + auto conv2d_2 = + pattern->NewNode(conv2d_2_repr())->assert_is_op("conv2d_xpu"); + conv2d_2_bias = pattern->NewNode(conv2d_2_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + conv2d_2_filter = pattern->NewNode(conv2d_2_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + conv2d_2_filter_max = pattern->NewNode(conv2d_2_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + conv2d_2_out = pattern->NewNode(conv2d_2_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "branch") + ->assert_has_n_outputs(1); + conv2d_2_out_max = pattern->NewNode(conv2d_2_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + std::vector conv2d_2_input{ + gn_silu_0_x, conv2d_2_bias, conv2d_2_filter, conv2d_2_filter_max}; + if (input_max_) { + conv2d_2_input.push_back(conv2d_2_x_max); + } + conv2d_2->LinksFrom(conv2d_2_input) + .LinksTo({conv2d_2_out, conv2d_2_out_max}); + } else { + gn_silu_0_x->assert_is_op_input("conv2d_xpu", "branch"); // conv2d_1 branch + conv2d_2_out = gn_silu_0_x; + } + + // conv2d_0 + auto conv2d_0 = pattern->NewNode(conv2d_0_repr())->assert_is_op("conv2d_xpu"); + auto conv2d_0_bias = pattern->NewNode(conv2d_0_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + auto conv2d_0_filter = pattern->NewNode(conv2d_0_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + auto conv2d_0_filter_max = + pattern->NewNode(conv2d_0_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + auto conv2d_0_out = pattern->NewNode(conv2d_0_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out") + ->assert_is_op_input("group_norm_silu_xpu", "x") + ->assert_has_n_outputs(1); + auto conv2d_0_out_max = pattern->NewNode(conv2d_0_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + conv2d_0 + ->LinksFrom( + {gn_silu_0_out, conv2d_0_bias, conv2d_0_filter, conv2d_0_filter_max}) + .LinksTo({conv2d_0_out, conv2d_0_out_max}); + + // gn_silu_1 + auto gn_silu_1 = + pattern->NewNode(gn_silu_1_repr())->assert_is_op("group_norm_silu_xpu"); + auto gn_silu_1_bias = pattern->NewNode(gn_silu_1_bias_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "bias") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_silu_1_scale = + pattern->NewNode(gn_silu_1_scale_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "scale") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_silu_1_out = pattern->NewNode(gn_silu_1_out_repr()) + ->assert_is_op_output("group_norm_silu_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "x") + ->assert_has_n_outputs(1); + gn_silu_1->LinksFrom({conv2d_0_out, gn_silu_1_bias, gn_silu_1_scale}) + .LinksTo({gn_silu_1_out}); + + // conv2d_1 + auto conv2d_1 = pattern->NewNode(conv2d_1_repr())->assert_is_op("conv2d_xpu"); + auto conv2d_1_bias = pattern->NewNode(conv2d_1_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + auto conv2d_1_filter = pattern->NewNode(conv2d_1_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + auto conv2d_1_filter_max = + pattern->NewNode(conv2d_1_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + auto conv2d_1_out = pattern->NewNode(conv2d_1_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out"); + auto conv2d_1_out_max = pattern->NewNode(conv2d_1_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + conv2d_1 + ->LinksFrom({gn_silu_1_out, + conv2d_2_out, + conv2d_1_bias, + conv2d_1_filter, + conv2d_1_filter_max}) + .LinksTo({conv2d_1_out, conv2d_1_out_max}); +} + +} // namespace patterns + +namespace { +static std::vector IntVec2DTo1D(const std::vector>& vec) { + std::vector res; + for (const auto& v : vec) { + for (const auto& ele : v) { + res.emplace_back(ele); + } + } + return res; +} + +} // namespace + +class SpatialTransformerResBlockXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + int FuseSpatialTransformerResBlock(ir::Graph* graph, + bool conv_fix = false, + bool input_max = false, + bool has_silu_fc_input = false, + bool include_silu = false) const; + + const std::string name_scope_{"spatial_transformer_resblock_xpu_fuse_pass"}; +}; + +void SpatialTransformerResBlockXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + int found_subgraph_count = 0; + for (auto conv_fix : {false, true}) { + for (auto has_silu_fc_input : {false}) { + for (auto include_silu : {false}) { + if (conv_fix == true) { + for (auto input_max : {true, false}) { + found_subgraph_count += + FuseSpatialTransformerResBlock(graph, + conv_fix /*true*/, + input_max, + has_silu_fc_input, + include_silu); + } + } else { + found_subgraph_count += + FuseSpatialTransformerResBlock(graph, + conv_fix /*false*/, + false, + has_silu_fc_input, + include_silu); + } + } + } + } + + AddStatis(found_subgraph_count); +} + +int SpatialTransformerResBlockXPUFusePass::FuseSpatialTransformerResBlock( + ir::Graph* graph, + bool conv_fix, + bool input_max, + bool has_silu_fc_input, + bool include_silu) const { + GraphPatternDetector gpd; + patterns::SpatialTransformerResBlockXPUPattern pattern(gpd.mutable_pattern(), + name_scope_, + conv_fix, + input_max, + has_silu_fc_input, + include_silu); + + int found_subgraph_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle SpatialTransformerResBlockXPUFusePass fuse"; + // declare operator node's name + GET_IR_NODE(gn_silu_0); + GET_IR_NODE(conv2d_0); + GET_IR_NODE(gn_silu_1); + GET_IR_NODE(conv2d_1); + GET_IR_NODE(conv2d_2); + // declare variable node's name + GET_IR_NODE(gn_silu_0_x); + GET_IR_NODE(gn_silu_0_bias); + GET_IR_NODE(gn_silu_0_scale); + GET_IR_NODE(gn_silu_0_out); + GET_IR_NODE(conv2d_0_bias); + GET_IR_NODE(conv2d_0_filter); + GET_IR_NODE(conv2d_0_filter_max); + GET_IR_NODE(conv2d_0_out); + GET_IR_NODE(conv2d_0_out_max); + GET_IR_NODE(gn_silu_1_bias); + GET_IR_NODE(gn_silu_1_scale); + GET_IR_NODE(gn_silu_1_out); + GET_IR_NODE(conv2d_1_bias); + GET_IR_NODE(conv2d_1_filter); + GET_IR_NODE(conv2d_1_filter_max); + GET_IR_NODE(conv2d_1_out); + GET_IR_NODE(conv2d_1_out_max); + GET_IR_NODE(conv2d_2_x_max); + GET_IR_NODE(conv2d_2_bias); + GET_IR_NODE(conv2d_2_filter); + GET_IR_NODE(conv2d_2_filter_max); + GET_IR_NODE(conv2d_2_out); + GET_IR_NODE(conv2d_2_out_max); + + auto* block = gn_silu_1->Op()->Block(); + auto* scope = param_scope(); + PADDLE_ENFORCE_NOT_NULL( + scope, platform::errors::InvalidArgument("Scope cannot be nullptr.")); + // delete useless node + std::unordered_set delete_nodes; + + std::vector> strides; + std::vector> paddings; + std::vector> dilations; + std::vector groups; + std::vector gn_eps; + std::vector gn_groups; + + // get attr + float gn_silu_0_eps = + PADDLE_GET_CONST(float, gn_silu_0->Op()->GetAttr("epsilon")); + gn_eps.emplace_back(std::move(gn_silu_0_eps)); + int gn_silu_0_groups = + PADDLE_GET_CONST(int, gn_silu_0->Op()->GetAttr("groups")); + gn_groups.emplace_back(std::move(gn_silu_0_groups)); + float gn_silu_1_eps = + PADDLE_GET_CONST(float, gn_silu_1->Op()->GetAttr("epsilon")); + gn_eps.emplace_back(std::move(gn_silu_1_eps)); + int gn_silu_1_groups = + PADDLE_GET_CONST(int, gn_silu_1->Op()->GetAttr("groups")); + gn_groups.emplace_back(std::move(gn_silu_1_groups)); + + // conv2d_0 + auto conv2d_0_dilations = PADDLE_GET_CONST( + std::vector, conv2d_0->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_0_dilations)); + int conv2d_0_groups = + PADDLE_GET_CONST(int, conv2d_0->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_0_groups)); + auto conv2d_0_paddings = + PADDLE_GET_CONST(std::vector, conv2d_0->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_0_paddings)); + std::string conv2d_0_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_0->Op()->GetAttr("padding_algorithm")); + auto conv2d_0_strides = + PADDLE_GET_CONST(std::vector, conv2d_0->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_0_strides)); + + // conv2d_1 + auto conv2d_1_dilations = PADDLE_GET_CONST( + std::vector, conv2d_1->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_1_dilations)); + int conv2d_1_groups = + PADDLE_GET_CONST(int, conv2d_1->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_1_groups)); + auto conv2d_1_paddings = + PADDLE_GET_CONST(std::vector, conv2d_1->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_1_paddings)); + std::string conv2d_1_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_1->Op()->GetAttr("padding_algorithm")); + auto conv2d_1_strides = + PADDLE_GET_CONST(std::vector, conv2d_1->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_1_strides)); + + std::vector conv_bias_names{conv2d_0_bias->Name(), + conv2d_1_bias->Name()}; + std::vector conv_filter_names{conv2d_0_filter->Name(), + conv2d_1_filter->Name()}; + std::vector conv_filter_max_names{conv2d_0_filter_max->Name(), + conv2d_1_filter_max->Name()}; + + // conv2d_2 + std::string conv2d_2_padding_algorithm; + if (conv_fix) { + auto conv2d_2_dilations = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_2_dilations)); + int conv2d_2_groups = + PADDLE_GET_CONST(int, conv2d_2->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_2_groups)); + auto conv2d_2_paddings = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_2_paddings)); + conv2d_2_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_2->Op()->GetAttr("padding_algorithm")); + auto conv2d_2_strides = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_2_strides)); + + conv_bias_names.emplace_back(std::move(conv2d_2_bias->Name())); + conv_filter_names.emplace_back(std::move(conv2d_2_filter->Name())); + conv_filter_max_names.emplace_back( + std::move(conv2d_2_filter_max->Name())); + } + + std::string fused_op_out_name; + fused_op_out_name = conv2d_1_out->Name(); + // Generate add_layernorm fused op + framework::OpDesc fused_op_desc(block); + + fused_op_desc.SetType("spatial_transformer_resblock_xpu"); + // set attrs for fused op + fused_op_desc.SetInput("x", {gn_silu_0_x->Name()}); + + if (input_max) { + fused_op_desc.SetInput("x_max", {conv2d_2_x_max->Name()}); + } else { + fused_op_desc.SetInput("x_max", {}); + } + + fused_op_desc.SetInput("conv_bias", conv_bias_names); + fused_op_desc.SetInput("conv_filter", conv_filter_names); + fused_op_desc.SetInput("conv_filter_max", conv_filter_max_names); + fused_op_desc.SetInput("gn_bias", + {gn_silu_0_bias->Name(), gn_silu_1_bias->Name()}); + fused_op_desc.SetInput("gn_scale", + {gn_silu_0_scale->Name(), gn_silu_1_scale->Name()}); + fused_op_desc.SetOutput("out", {fused_op_out_name}); + fused_op_desc.SetOutput("out_max", {conv2d_1_out_max->Name()}); + + fused_op_desc.SetAttr("dilations", IntVec2DTo1D(dilations)); + fused_op_desc.SetAttr("paddings", IntVec2DTo1D(paddings)); + fused_op_desc.SetAttr("strides", IntVec2DTo1D(strides)); + fused_op_desc.SetAttr("groups", groups); + fused_op_desc.SetAttr("gn_eps", gn_eps); + fused_op_desc.SetAttr("gn_groups", gn_groups); + fused_op_desc.SetAttr("conv_fix", conv_fix); + fused_op_desc.SetAttr("has_silu_fc_input", has_silu_fc_input); + fused_op_desc.SetAttr("include_silu", include_silu); + + // relink fused op + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + + IR_NODE_LINK_TO(gn_silu_0_x, fused_op); + IR_NODE_LINK_TO(gn_silu_0_bias, fused_op); + IR_NODE_LINK_TO(gn_silu_0_scale, fused_op); + IR_NODE_LINK_TO(conv2d_0_bias, fused_op); + IR_NODE_LINK_TO(conv2d_0_filter, fused_op); + IR_NODE_LINK_TO(conv2d_0_filter_max, fused_op); + IR_NODE_LINK_TO(gn_silu_1_bias, fused_op); + IR_NODE_LINK_TO(gn_silu_1_scale, fused_op); + IR_NODE_LINK_TO(conv2d_1_bias, fused_op); + IR_NODE_LINK_TO(conv2d_1_filter, fused_op); + IR_NODE_LINK_TO(conv2d_1_filter_max, fused_op); + + if (conv_fix) { + if (input_max) { + IR_NODE_LINK_TO(conv2d_2_x_max, fused_op); + } + IR_NODE_LINK_TO(conv2d_2_bias, fused_op); + IR_NODE_LINK_TO(conv2d_2_filter, fused_op); + IR_NODE_LINK_TO(conv2d_2_filter_max, fused_op); + } + + IR_NODE_LINK_TO(fused_op, conv2d_1_out); + IR_NODE_LINK_TO(fused_op, conv2d_1_out_max); + + delete_nodes.insert({gn_silu_0, + gn_silu_1, + conv2d_0, + conv2d_1, + gn_silu_0_out, + conv2d_0_out, + conv2d_0_out_max, + gn_silu_1_out}); + + if (conv_fix) { + delete_nodes.insert({conv2d_2, conv2d_2_out, conv2d_2_out_max}); + } + GraphSafeRemoveNodes(graph, delete_nodes); + found_subgraph_count++; + }; + + gpd(graph, handler); + return found_subgraph_count; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(spatial_transformer_resblock_xpu_fuse_pass, + paddle::framework::ir::SpatialTransformerResBlockXPUFusePass); + +REGISTER_PASS_CAPABILITY(spatial_transformer_resblock_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "spatial_transformer_resblock_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc index 8a319b8a350a0..381215b857303 100644 --- a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc +++ b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc @@ -164,7 +164,9 @@ void XPUQuantizeOpPass::QuantizeConv(ir::Graph* graph) const { out_var_node = output_node; } } - if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node})) { + if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node}) || + w_var_node->Var()->GetDataType() != + proto::VarType::Type::VarType_Type_INT8) { VLOG(4) << "Skip quantize op: " << n->Name() << "x_var_node_name:" << x_var_node->Name() << " w_var_node_name:" << w_var_node->Name(); @@ -239,8 +241,9 @@ void XPUQuantizeOpPass::QuantizeFC(ir::Graph* graph) const { out_var_node = output_node; } } - if (!AreScalesPresentForNodes(&var_quant_scales_, - {x_var_node, w_var_node})) { + if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node}) || + w_var_node->Var()->GetDataType() != + proto::VarType::Type::VarType_Type_INT8) { MarkAndLogCannotQuantizeOp(n, "No scale available for the operator"); continue; } @@ -261,6 +264,71 @@ void XPUQuantizeOpPass::QuantizeFC(ir::Graph* graph) const { } } +void XPUQuantizeOpPass::QuantizeQkvAttention(ir::Graph* graph) const { + for (auto* n : graph->Nodes()) { + if (n->IsOp()) { + auto* op = n->Op(); + if (op->Type() != "qkv_attention_xpu") { + continue; + } + std::vector max_node_names = { + "q_max", "k_max", "v_max", "qk_max"}; + std::unordered_map input_node_map; + for (auto* input_node : n->inputs) { + if (!input_node->IsVar()) { + continue; + } + for (auto input_name : op->InputNames()) { + if (op->Input(input_name)[0] == input_node->Var()->Name()) { + input_node_map[input_name] = input_node; + } + } + } + bool continue_flag = false; + for (auto max_name : max_node_names) { + if (input_node_map.find(max_name) == input_node_map.end()) { + continue_flag = true; + break; + } + } + if (continue_flag) { + continue; + } + Node* out_var_node = nullptr; + for (auto* output_node : n->outputs) { + if (!output_node->IsVar()) { + continue; + } + if (output_node->Var()->Name() == op->Output("qkv")[0]) { + out_var_node = output_node; + } + } + if (input_node_map["q"]->Name() == input_node_map["k"]->Name() && + input_node_map["q"]->Name() == input_node_map["v"]->Name()) { + QuantizeInput(graph, n, input_node_map["q"], "q"); + op->SetInput("k", op->Input("q")); + op->SetInput("v", op->Input("q")); + UnlinkNodes(input_node_map["k"], n); + UnlinkNodes(input_node_map["v"], n); + } else { + QuantizeInput(graph, n, input_node_map["q"], "q"); + QuantizeInput(graph, n, input_node_map["k"], "k"); + QuantizeInput(graph, n, input_node_map["v"], "v"); + } + auto has_output_scale = + AreScalesPresentForNodes(&var_quant_scales_, {out_var_node}); + if (has_output_scale) { + DequantizeOutput(graph, n, out_var_node, "qkv"); + n->Op()->SetAttr( + "out_dtype", + static_cast(proto::VarType::Type::VarType_Type_INT8)); + } else { + n->Op()->SetAttr("out_dtype", + input_node_map["q"]->Var()->GetDataType()); + } + } + } +} void XPUQuantizeOpPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Insert quantize/dequantize op to the graph."; PADDLE_ENFORCE_NOT_NULL( @@ -273,6 +341,7 @@ void XPUQuantizeOpPass::ApplyImpl(ir::Graph* graph) const { GetQuantInfo(graph); QuantizeConv(graph); QuantizeFC(graph); + QuantizeQkvAttention(graph); } } // namespace ir diff --git a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h index 28d0f42e76bde..312b6a540c8cc 100644 --- a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h +++ b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h @@ -38,6 +38,7 @@ class XPUQuantizeOpPass : public FusePassBase { protected: void ApplyImpl(Graph* graph) const override; void QuantizeConv(Graph* graph) const; + void QuantizeQkvAttention(Graph* graph) const; void QuantizeFC(Graph* graph) const; private: diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index d3b74fb00c1c5..f37f8f0d6a1e8 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/denormal.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #ifdef PADDLE_WITH_TENSORRT #include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h" diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index d06fdd8c4c7cd..01c6cd7c12a43 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -5,7 +5,7 @@ if(NOT (WITH_CINN)) ${CMAKE_CURRENT_SOURCE_DIR}/instruction/cinn_jit_instruction.cc) endif() -if(NOT WITH_MKLDNN) +if(NOT WITH_ONEDNN) list( REMOVE_ITEM standalone_executor_srcs @@ -54,6 +54,6 @@ cc_library( add_dependencies(standalone_executor xxhash framework_proto) -if(WITH_MKLDNN) - add_dependencies(standalone_executor mkldnn) +if(WITH_ONEDNN) + add_dependencies(standalone_executor onednn) endif() diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc index 0730ef34f140b..8bd67fe50d698 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc @@ -38,7 +38,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc index 56bf04227d49b..838f6dbce67b6 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc @@ -37,7 +37,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc index e4cc8568bbf88..1385f1d357a3d 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc @@ -38,7 +38,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index 3bc5893a162b3..00b5410247ddc 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -702,7 +702,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, &arguments); } #ifdef PADDLE_WITH_DNNL - // For input that is Extra, only MKLDNN will use Extra Inputs + // For input that is Extra, only OneDNN will use Extra Inputs auto& extra_input_names = paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap( op_with_kernel->Type()); diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 1e093f7247320..850a038ea790c 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -42,7 +42,7 @@ #include "paddle/phi/core/kernel_factory.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE diff --git a/paddle/fluid/framework/new_executor/interpreter/static_build.cc b/paddle/fluid/framework/new_executor/interpreter/static_build.cc index 131f756bdb1d3..ac58f499e91ca 100644 --- a/paddle/fluid/framework/new_executor/interpreter/static_build.cc +++ b/paddle/fluid/framework/new_executor/interpreter/static_build.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/platform/flags.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(cache_inference_while_scope); diff --git a/paddle/fluid/framework/new_executor/interpreter_base_impl.h b/paddle/fluid/framework/new_executor/interpreter_base_impl.h index 1d9bac63d7c15..1a6fe75fc518a 100644 --- a/paddle/fluid/framework/new_executor/interpreter_base_impl.h +++ b/paddle/fluid/framework/new_executor/interpreter_base_impl.h @@ -112,9 +112,9 @@ class InterpreterBaseImpl { virtual bool IsSharedResultsBuild() const = 0; - virtual void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) = 0; + virtual void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) = 0; virtual bool IsStaticBuild() const = 0; diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index c2b234d8d667f..8301a4e8c985c 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -37,7 +37,7 @@ #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.h" #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_instruction.h" #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" @@ -1927,7 +1927,8 @@ Variable* PirInterpreter::DebugVar(const std::string& name) const { void PirInterpreter::Build( const std::vector& feed_names, - std::vector* op_func_nodes) { + std::vector* op_func_nodes, + bool switch_stream) { PADDLE_THROW(platform::errors::Unimplemented( "Build is not implemented in PirInterpreter.")); } diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.h b/paddle/fluid/framework/new_executor/pir_interpreter.h index 9901dcf421cdc..819bf7486d685 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.h +++ b/paddle/fluid/framework/new_executor/pir_interpreter.h @@ -142,9 +142,9 @@ class PirInterpreter : public InterpreterBaseImpl { void CheckCUDAGraphBeforeRun(const std::vector& feed_names); void PrepareForCUDAGraphCapture(); - void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) override; + void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) override; bool IsStaticBuild() const override { return static_build_; } diff --git a/paddle/fluid/framework/new_executor/program_interpreter.cc b/paddle/fluid/framework/new_executor/program_interpreter.cc index 8991fd9c3a22d..0bca82f5016e1 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.cc +++ b/paddle/fluid/framework/new_executor/program_interpreter.cc @@ -29,7 +29,7 @@ #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/phi/backends/device_manager.h" @@ -150,7 +150,7 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, is_in_op_profiling_mode_ = enable_op_profiling; std::vector op_func_nodes; - Build(feed_names, &op_func_nodes); + Build(feed_names, &op_func_nodes, switch_stream); if (!is_build_) { SetFeedVarsInplaceSkip(feed_names); @@ -166,7 +166,7 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, } else { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (switch_stream) { - BuildOpFuncNode(&op_func_nodes); + Convert(&op_func_nodes); } #endif RunImpl(); @@ -208,7 +208,8 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, void ProgramInterpreter::Build( const std::vector& feed_names, - std::vector* op_func_nodes) { + std::vector* op_func_nodes, + bool switch_stream) { SetDeviceId(place_); CheckCUDAGraphBeforeRun(feed_names); @@ -216,7 +217,7 @@ void ProgramInterpreter::Build( platform::AttachPointerHashToMKLDNNKey(this, place_); #endif - if (!is_build_) { + if (!is_build_ || switch_stream) { LOG_FIRST_N(INFO, 1) << "New Executor is Running."; paddle::framework::interpreter::BuildVariableScope( block_, execution_config_, &var_scope_); @@ -678,7 +679,42 @@ std::tuple ProgramInterpreter::InterpreterRunTime() { void ProgramInterpreter::Convert( std::vector* op_func_nodes) { auto& vec_meta_info = var_scope_.MutableVecMetaInfo(); - BuildOpFuncNode(op_func_nodes); + auto nodes = *op_func_nodes; + auto op_nums = nodes.size(); + vec_instruction_.clear(); + vec_instruction_.reserve(op_nums); + for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { + auto& op_func_node = nodes[op_idx]; + stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); + auto* dev_ctx_ = stream_analyzer_.ParseDeviceContext(op_func_node); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (FLAGS_new_executor_use_cuda_graph) { + auto& op = op_func_node.operator_base_; + auto& op_type = op->Type(); + if (op_type == interpreter::kMemcpyD2H || + op_type == interpreter::kMemcpyH2D) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cuda memory copy d2h/h2d is not allowed while using cuda graph.")); + } + PADDLE_ENFORCE_EQ(typeid(*dev_ctx_) == typeid(phi::GPUContext), + true, + platform::errors::InvalidArgument( + "Device context of op %s must be [%s] while using " + "cuda graph, but got [%s].", + op_type, + typeid(phi::GPUContext).name(), + typeid(*dev_ctx_).name())); + // cuda graph needs to record all stream + phi::backends::gpu::CUDAGraphContextManager::Instance() + .RecordCapturingDeviceContext(dev_ctx_); + } +#endif + vec_instruction_.emplace_back(op_idx, std::move(op_func_node), *dev_ctx_); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + vec_instruction_.back().UpdateRecordStreamForGcInfo(); +#endif + } BuildOperatorDependences(); @@ -715,7 +751,6 @@ void ProgramInterpreter::Convert( } // calculate last_live_ops_ - auto op_nums = (*op_func_nodes).size(); for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { Instruction& instr = vec_instruction_[op_idx]; OpInOutInfo info; @@ -852,46 +887,6 @@ void ProgramInterpreter::Convert( AnalyseExecuteOrderForTrace(); } -void ProgramInterpreter::BuildOpFuncNode( - std::vector* op_func_nodes) { - auto nodes = *op_func_nodes; - auto op_nums = nodes.size(); - vec_instruction_.clear(); - vec_instruction_.reserve(op_nums); - for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { - auto& op_func_node = nodes[op_idx]; - stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); - auto* dev_ctx_ = stream_analyzer_.ParseDeviceContext(op_func_node); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (FLAGS_new_executor_use_cuda_graph) { - auto& op = op_func_node.operator_base_; - auto& op_type = op->Type(); - if (op_type == interpreter::kMemcpyD2H || - op_type == interpreter::kMemcpyH2D) { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Cuda memory copy d2h/h2d is not allowed while using cuda graph.")); - } - PADDLE_ENFORCE_EQ(typeid(*dev_ctx_) == typeid(phi::GPUContext), - true, - platform::errors::InvalidArgument( - "Device context of op %s must be [%s] while using " - "cuda graph, but got [%s].", - op_type, - typeid(phi::GPUContext).name(), - typeid(*dev_ctx_).name())); - // cuda graph needs to record all stream - phi::backends::gpu::CUDAGraphContextManager::Instance() - .RecordCapturingDeviceContext(dev_ctx_); - } -#endif - vec_instruction_.emplace_back(op_idx, std::move(op_func_node), *dev_ctx_); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - vec_instruction_.back().UpdateRecordStreamForGcInfo(); -#endif - } -} - void ProgramInterpreter::BuildSkipShareLoDInfo() { for (size_t i = 0; i < vec_instruction_.size(); ++i) { bool can_skip_lod = true; diff --git a/paddle/fluid/framework/new_executor/program_interpreter.h b/paddle/fluid/framework/new_executor/program_interpreter.h index 94a8af8197d11..f72faf54f2b1d 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.h +++ b/paddle/fluid/framework/new_executor/program_interpreter.h @@ -60,9 +60,9 @@ class ProgramInterpreter : public InterpreterBaseImpl { std::shared_ptr GetMutableCopyProgram() override; - void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) override; + void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) override; void ShareWorkQueueFrom(InterpreterBaseImpl* src) override; @@ -131,8 +131,6 @@ class ProgramInterpreter : public InterpreterBaseImpl { void BuildSkipShareLoDInfo(); void UpdateSyncOpNum(); void AnalyseExecuteOrderForTrace(); - void BuildOpFuncNode( - std::vector* op_func_nodes); // inplace void BuildInplace(); diff --git a/paddle/fluid/framework/new_executor/workqueue/event_count.h b/paddle/fluid/framework/new_executor/workqueue/event_count.h index 9f80b02904dad..6918cc5a42edd 100644 --- a/paddle/fluid/framework/new_executor/workqueue/event_count.h +++ b/paddle/fluid/framework/new_executor/workqueue/event_count.h @@ -121,7 +121,7 @@ class EventCount { CheckState(state, true); uint64_t newstate; if ((state & kSignalMask) != 0) { - // Consume the signal and return immidiately. + // Consume the signal and return immediately. newstate = state - kWaiterInc - kSignalInc; } else { // Remove this thread from pre-wait counter and add to the waiter stack. @@ -148,7 +148,7 @@ class EventCount { CheckState(state, true); uint64_t newstate = state - kWaiterInc; // We don't know if the thread was also notified or not, - // so we should not consume a signal unconditionaly. + // so we should not consume a signal unconditionally. // Only if number of waiters is equal to number of signals, // we know that the thread was notified and we must take away the signal. if (((state & kWaiterMask) >> kWaiterShift) == diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index ce0a138eb1a6a..4839592aa43b7 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -107,7 +107,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { bool ret = (l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r); #ifdef PADDLE_WITH_DNNL - // Layout transform needed for either non-MKLDNN to MKLDNN or vice versa + // Layout transform needed for either non-MKLDNN to OneDNN or vice versa ret |= (l != DataLayout::ONEDNN && r == DataLayout::ONEDNN); ret |= (l == DataLayout::ONEDNN && r != DataLayout::ONEDNN); #endif diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index fe10a16375f34..d5dab65d18d15 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -53,8 +53,8 @@ class DenseTensor; #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_op_list.h" +#include "paddle/fluid/platform/onednn_helper.h" +#include "paddle/fluid/platform/onednn_op_list.h" #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -504,7 +504,7 @@ void RuntimeInferShapeContext::ShareLoD(const std::string& in, // Workaround: // Skip set_layout() when input layout is kMKLDNN // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN - // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // OPKernel. In all OneDNN OPkernel, set_layout(kMKLDNN) should be called // in Compute() if (in_tensor.layout() != DataLayout::ONEDNN) #endif @@ -1571,12 +1571,12 @@ bool OperatorWithKernel::SupportsKernelType( } #endif -// NOTE(jiahongyu): If MKLDNN can be used, the function SupportsKernelType needs -// to check whether current op supports MKLDNN kernel. There are three +// NOTE(jiahongyu): If OneDNN can be used, the function SupportsKernelType needs +// to check whether current op supports OneDNN kernel. There are three // statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && this->CanMKLDNNBeUsed(exe_ctx, kernel_type.data_type_)) { @@ -1771,7 +1771,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // TODO(chenweihang): Now we are still reusing a lot of the original fluid // implementation, this is a gradual replacement process // TODO(chenweihang): in the first phase of project, we only support CPU, CUDA - // and RCOM backend, the XPU, NPU and MKLDNN will be supported in the second + // and RCOM backend, the XPU, NPU and OneDNN will be supported in the second // phase phi::KernelKey phi_kernel_key; std::string phi_kernel_name; @@ -1846,13 +1846,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } else { phi_kernel_name = kernel_signature_->name; -// NOTE(jiahongyu): The registered MKLDNN kernel have library_type = +// NOTE(jiahongyu): The registered OneDNN kernel have library_type = // LibraryType::kMKLDNN and data_layout_ = DataLayout::ONEDNN. But the default // values are kPlain, so we need to modify the library_type and data_layout_ // here. There are three statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && @@ -2121,7 +2121,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } if (FLAGS_enable_unused_var_check) { - // skip op that uses mkldnn because it has different memory reuse strategy. + // skip op that uses onednn because it has different memory reuse strategy. // use attr here because some GradMakers (like ActivationGradOpMaker) add // input when use_mkldnn=true; if (!(HasAttr("use_mkldnn") && Attr("use_mkldnn"))) { @@ -2181,12 +2181,12 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( framework::TransPhiKernelKeyToOpKernelType(phi_kernel_key); // NOTE(jiahongyu): PADDLE_WITH_DNNL codes are moved outside function -// GetExpectedKernelType, so that if MKLDNN can be used, the library_type_ and +// GetExpectedKernelType, so that if OneDNN can be used, the library_type_ and // data_layout_ of expected_kernel_key need to be adjusted. There are three // statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && this->CanMKLDNNBeUsed(ctx, expected_kernel_key.data_type_)) { @@ -2815,7 +2815,7 @@ Scope* OperatorWithKernel::PrepareData( prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input); } #ifdef PADDLE_WITH_DNNL - // For input that is Extra, only MKLDNN will use Extra Inputs + // For input that is Extra, only OneDNN will use Extra Inputs auto& extra_input_names = paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap( Type()); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 3ad9ec6c9d698..dc025998cc099 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -921,7 +921,7 @@ class OperatorWithKernel : public OperatorBase { mutable std::mutex cache_update_mutex_; mutable bool enable_cache_transfer_scope_ = false; // NOTE(jiahongyu): Whether fallback to plain kernel after calling - // GetExpectedKernelType, use this bool flag to solve mkldnn and cudnn hard + // GetExpectedKernelType, use this bool flag to solve onednn and cudnn hard // code mutable bool dnn_fallback_ = false; // NOTE(chenweihang): Similar op members are used to adapt to diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index 4a0a869b8a2bd..2e4e5083caa36 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -17,8 +17,8 @@ set(paddle2cinn_deps auto_schedule_proto parallel_executor common) -if(WITH_MKLDNN) - set(paddle2cinn ${paddle2cinn} mkldnn) +if(WITH_ONEDNN) + set(paddle2cinn ${paddle2cinn} onednn) endif() if(WITH_TESTING) diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index 31ab7e1b1bcaa..4b5051a8aadd0 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -79,7 +79,8 @@ cc_library( layout_autotune ops_extra_info phi - common) + common + global_utils) cc_library( basic_engine SRCS basic_engine.cc diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5192e8c773888..a3c5b51b80b3b 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -27,7 +27,7 @@ #include "paddle/fluid/platform/profiler.h" #include "paddle/phi/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(use_mkldnn); diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index a60c81a4c22d9..9f4f46c60cea4 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -26,7 +26,7 @@ #include "paddle/fluid/platform/device/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_op_list.h" +#include "paddle/fluid/platform/onednn_op_list.h" #endif #include "paddle/common/flags.h" #include "paddle/fluid/framework/library_type.h" @@ -166,7 +166,7 @@ PreparedOp PrepareImpl( auto* dev_ctx = pool.Get(place); #ifdef PADDLE_WITH_DNNL - // MKLDNN variant of code reads attributes in some of GetKernelTypeForVar and + // OneDNN variant of code reads attributes in some of GetKernelTypeForVar and // GetKernelType functions, so we need to copy the attributes there. // Const qualifier of Attrs had to be discarded to overwrite it. if (FLAGS_use_mkldnn) { @@ -190,13 +190,13 @@ PreparedOp PrepareImpl( phi::KernelSignature kernel_signature; std::string phi_kernel_name; -// NOTE(jiahongyu): The registered MKLDNN kernel have library_type = +// NOTE(jiahongyu): The registered OneDNN kernel have library_type = // LibraryType::kMKLDNN and data_layout_ = DataLayout::ONEDNN. But the default // values are kPlain, so we need to modify the library_type and data_layout_ // here. There are three statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!op.DnnFallback() && !paddle::platform::in_mkldnn_white_list(op.Type()) && op.CanMKLDNNBeUsed(dygraph_exe_ctx, expected_kernel_key.dtype())) { diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 3eff589fee703..7aa4652ec0058 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -36,8 +36,8 @@ #include "paddle/utils/string/string_helper.h" COMMON_DECLARE_bool(use_mkldnn); -COMMON_DECLARE_string(tracer_mkldnn_ops_on); -COMMON_DECLARE_string(tracer_mkldnn_ops_off); +COMMON_DECLARE_string(tracer_onednn_ops_on); +COMMON_DECLARE_string(tracer_onednn_ops_off); COMMON_DECLARE_bool(use_stride_kernel); namespace paddle { @@ -245,12 +245,12 @@ void Tracer::TraceOpImpl(const std::string& type, // if both lists are empty all ops are enabled (default for // FLAGS_use_mkldnn=1) // if ops_on list is not empty only ops from that list are enabled - if (!FLAGS_tracer_mkldnn_ops_on.empty()) { - auto is_on = FLAGS_tracer_mkldnn_ops_on.find(type) != std::string::npos; + if (!FLAGS_tracer_onednn_ops_on.empty()) { + auto is_on = FLAGS_tracer_onednn_ops_on.find(type) != std::string::npos; attrs["use_mkldnn"] = is_on; } else { // if ops_on list is empty all ops are enabled except types from off_list - auto is_off = FLAGS_tracer_mkldnn_ops_off.find(type) != std::string::npos; + auto is_off = FLAGS_tracer_onednn_ops_off.find(type) != std::string::npos; attrs["use_mkldnn"] = !is_off; } } diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 77052155efaa6..1f3544bf702b4 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -132,7 +132,7 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("graph_viz_path", new std::string(std::move(dot_file_path))); pass->Set("optim_cache_dir", new std::string(std::move(optim_cache_dir))); pass_num++; - } else if (pass_name == "mkldnn_placement_pass") { + } else if (pass_name == "onednn_placement_pass") { pass->Set("mkldnn_enabled_op_types", new std::unordered_set( argument->mkldnn_enabled_op_types())); @@ -364,13 +364,13 @@ void IRPassManager::CreatePasses(Argument *argument, argument->nnadapter_model_cache_token())); } else if (pass_name == "fc_fuse_pass") { pass->Set("use_gpu", new bool(argument->use_gpu())); - bool fc_mkldnn_pass = false; + bool fc_onednn_pass = false; for (const std::string &pass_n : passes) { - if (pass_n == "fc_mkldnn_pass") { - fc_mkldnn_pass = true; + if (pass_n == "fc_onednn_pass") { + fc_onednn_pass = true; } } - bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding(); + bool use_fc_padding = !fc_onednn_pass && argument->use_fc_padding(); pass->Set("use_fc_padding", new bool(use_fc_padding)); } else if (pass_name == "fused_multi_transformer_xpu_pass") { int quant_post_dynamic_weight_precision = diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 65a4bea5b1240..c559b6d7e8897 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -4,12 +4,12 @@ endif() add_subdirectory(details) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(mkldnn_quantizer_cfg mkldnn_quantizer_config) - set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn_quantizer.cc) + set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/onednn_quantizer.cc) cc_library( ${mkldnn_quantizer_cfg} - SRCS mkldnn_quantizer_config.cc + SRCS onednn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder) set(mkldnn_quantizer_cfg ${mkldnn_quantizer_cfg} diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index efe7b83f7df16..b8570fa05e7c4 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -505,7 +505,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(dlnne_precision_mode_); CP_MEMBER(dlnne_disable_nodes_by_outputs_); CP_MEMBER(dlnne_input_shape_dict_); - // MKLDNN related. + // OneDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); CP_MEMBER(mkldnn_cache_capacity_); @@ -991,18 +991,18 @@ void AnalysisConfig::Update() { #ifdef PADDLE_WITH_DNNL // Since EnableMKLDNN is default, the pass_builder has created in the first // time. - // Case1: User manually disable mkldnn after pass_builder + // Case1: User manually disable onednn after pass_builder // create.(config.disable_mkldnn()) // Case2: User device is gpu/ipu/xpu, use // EnableXpu(), EnableCUDNN(), PassStrategy has been reset in the above code // block // Case3: pass_builder_ has been created and belongs to - // GpuPassStrategy(or IpuPassStrategy), neither enable mkldnn and - // disable mkldnn will be executed + // GpuPassStrategy(or IpuPassStrategy), neither enable onednn and + // disable onednn will be executed if ((!use_gpu() && !use_xpu() && !use_ipu() && !use_mkldnn_) || (use_mkldnn_ && !phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx2))) { - // User manually disable mkldnn or disable when not support AVX2 + // User manually disable onednn or disable when not support AVX2 use_mkldnn_ = false; pass_builder()->DisableMKLDNN(); } @@ -1054,7 +1054,7 @@ void AnalysisConfig::Update() { if (!use_gpu() && !use_xpu() && !use_ipu()) { if (use_mkldnn_ && enable_ir_optim_) { #ifdef PADDLE_WITH_DNNL - // default enable mkldnn when device is cpu and enable_ir_optim + // default enable onednn when device is cpu and enable_ir_optim pass_builder()->EnableMKLDNN(); #endif } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a0a61c034d831..d254b7dd046c7 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -80,7 +80,7 @@ #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/inference/api/mkldnn_quantizer.h" +#include "paddle/fluid/inference/api/onednn_quantizer.h" #endif #ifdef PADDLE_WITH_ONNXRUNTIME @@ -408,7 +408,7 @@ bool AnalysisPredictor::Init( root_predictor_id_ = predictor_id_; } - // no matter with or without MKLDNN + // no matter with or without OneDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); // Use Optimized model to inference @@ -619,6 +619,9 @@ void AnalysisPredictor::ClearExtraParams() { config_.shape_range_info_path_); } } + if (op_desc->HasAttr("predictor_id")) { + op_desc->SetAttr("predictor_id", predictor_id_); + } } } @@ -781,10 +784,18 @@ bool AnalysisPredictor::PrepareProgram( executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); // if enable_ir_optim_ is false, - // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will + // the analysis pass(op fuse, graph analysis, trt subgraph, onednn etc) will // not be executed. model_precision_ = paddle::inference::GetModelPrecision(*inference_program_); +#ifdef PADDLE_WITH_TENSORRT + if (config_.tensorrt_engine_enabled()) { + inference::tensorrt::TensorRTEngine::predictor_id_per_thread = + predictor_id_; + VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " + << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; + } +#endif if (config_.use_optimized_model_) { LoadParameters(); ClearExtraParams(); @@ -2011,14 +2022,6 @@ void AnalysisPredictor::PrepareArgument() { // NOTE All the members in AnalysisConfig should be copied to Argument. void AnalysisPredictor::OptimizeInferenceProgram() { PrepareArgument(); -#ifdef PADDLE_WITH_TENSORRT - if (config_.tensorrt_engine_enabled()) { - inference::tensorrt::TensorRTEngine::predictor_id_per_thread = - predictor_id_; - VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " - << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; - } -#endif Analyzer().Run(argument_.get()); PADDLE_ENFORCE_EQ( argument_->scope_valid(), diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index fe494cab93a90..d44ad5cec1a90 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -321,7 +321,7 @@ class AnalysisPredictor : public PaddlePredictor { void RegisterInputHook(const InputTensorHookFunc &hookfunc) override; /// - /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass + /// \brief Initialize onednn quantizer and execute onednn quantization pass /// /// \return Whether the function executed successfully /// diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 1ae582feb4acf..9ae284402f196 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -71,7 +71,7 @@ bool NativePaddlePredictor::Init( platform::EnableProfiler(tracking_device); } - // no matter with or without MKLDNN + // no matter with or without OneDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); if (config_.use_gpu) { diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 833fc98d36dba..5597057c3dc12 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -155,13 +155,13 @@ if(WITH_MKL) ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() - set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") - if(EXISTS ${MKLDNN_PATH}) - include_directories("${MKLDNN_PATH}/include") + set(ONEDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}onednn") + if(EXISTS ${ONEDNN_PATH}) + include_directories("${ONEDNN_PATH}/include") if(WIN32) - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + set(MKLDNN_LIB ${ONEDNN_PATH}/lib/mkldnn.lib) else() - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libdnnl.so.3) + set(MKLDNN_LIB ${ONEDNN_PATH}/lib/libdnnl.so.3) endif() endif() else() @@ -309,7 +309,7 @@ if(WIN32) ${LIB_PATH} COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy ${ONEDNN_PATH}/lib/mkldnn.dll ${LIB_PATH}) else() add_custom_command( diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/onednn_quantizer.cc similarity index 99% rename from paddle/fluid/inference/api/mkldnn_quantizer.cc rename to paddle/fluid/inference/api/onednn_quantizer.cc index 76222b84d4624..aa6f52008ab24 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/onednn_quantizer.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/api/mkldnn_quantizer.h" +#include "paddle/fluid/inference/api/onednn_quantizer.h" #include #include @@ -29,7 +29,7 @@ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/common/place.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.h b/paddle/fluid/inference/api/onednn_quantizer.h similarity index 100% rename from paddle/fluid/inference/api/mkldnn_quantizer.h rename to paddle/fluid/inference/api/onednn_quantizer.h diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/onednn_quantizer_config.cc similarity index 98% rename from paddle/fluid/inference/api/mkldnn_quantizer_config.cc rename to paddle/fluid/inference/api/onednn_quantizer_config.cc index da20870eb0f5c..786d9463766e9 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/onednn_quantizer_config.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h" +#include "paddle/fluid/inference/api/paddle_onednn_quantizer_config.h" namespace paddle { diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index dcf17dc4399c2..019418f45b625 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -39,7 +39,7 @@ #include "paddle_api.h" // NOLINT #include "paddle_pass_builder.h" // NOLINT #ifdef PADDLE_WITH_DNNL -#include "paddle_mkldnn_quantizer_config.h" // NOLINT +#include "paddle_onednn_quantizer_config.h" // NOLINT #endif namespace paddle { @@ -970,19 +970,19 @@ struct PD_INFER_DECL AnalysisConfig { void SwitchIrDebug(int x = true, const std::vector& passes = {}); /// - /// \brief Turn on MKLDNN. + /// \brief Turn on OneDNN. /// /// void EnableMKLDNN(); /// - /// \brief Turn down MKLDNN. + /// \brief Turn down OneDNN. /// /// void DisableMKLDNN(); /// - /// \brief Set the cache capacity of different input shapes for MKLDNN. + /// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -991,9 +991,9 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetMkldnnCacheCapacity(int capacity); /// - /// \brief A boolean state telling whether to use the MKLDNN. + /// \brief A boolean state telling whether to use the OneDNN. /// - /// \return bool Whether to use the MKLDNN. + /// \return bool Whether to use the OneDNN. /// bool mkldnn_enabled() const { return use_mkldnn_; } @@ -1021,7 +1021,7 @@ struct PD_INFER_DECL AnalysisConfig { /// NativeConfig ToNativeConfig() const; /// - /// \brief Specify the operator type list to use MKLDNN acceleration. + /// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param op_list The operator type list. /// @@ -1030,47 +1030,47 @@ struct PD_INFER_DECL AnalysisConfig { } /// - /// \brief Turn on MKLDNN quantization. + /// \brief Turn on OneDNN quantization. /// /// void EnableMkldnnQuantizer(); /// - /// \brief Turn on MKLDNN int8. + /// \brief Turn on OneDNN int8. /// /// \param op_list The operator type list. /// void EnableMkldnnInt8(const std::unordered_set& op_list = {}); /// - /// \brief A boolean state telling whether to use the MKLDNN Int8. + /// \brief A boolean state telling whether to use the OneDNN Int8. /// - /// \return bool Whether to use the MKLDNN Int8. + /// \return bool Whether to use the OneDNN Int8. /// bool mkldnn_int8_enabled() const { return use_mkldnn_int8_; } /// - /// \brief Turn on MKLDNN bfloat16. + /// \brief Turn on OneDNN bfloat16. /// /// void EnableMkldnnBfloat16(); /// - /// \brief Turn off MKLDNN fc passes. + /// \brief Turn off OneDNN fc passes. /// void DisableMkldnnFcPasses(); /// - /// \brief A boolean state telling whether to disable the MKLDNN Fc passes. + /// \brief A boolean state telling whether to disable the OneDNN Fc passes. /// - /// \return bool Whether to disable the MKLDNN Fc passes. + /// \return bool Whether to disable the OneDNN Fc passes. /// bool mkldnn_fc_passes_disabled() const { return disable_mkldnn_fc_passes_; } /// - /// \brief A boolean state telling whether to use the MKLDNN Bfloat16. + /// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// - /// \return bool Whether to use the MKLDNN Bfloat16. + /// \return bool Whether to use the OneDNN Bfloat16. /// bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; } @@ -1091,16 +1091,16 @@ struct PD_INFER_DECL AnalysisConfig { bool thread_local_stream_enabled() const { return thread_local_stream_; } /// - /// \brief A boolean state telling whether the MKLDNN quantization is enabled. + /// \brief A boolean state telling whether the OneDNN quantization is enabled. /// - /// \return bool Whether the MKLDNN quantization is enabled. + /// \return bool Whether the OneDNN quantization is enabled. /// bool mkldnn_quantizer_enabled() const { return use_mkldnn_quantizer_; } /// - /// \brief Get MKLDNN quantizer config. + /// \brief Get OneDNN quantizer config. /// - /// \return MkldnnQuantizerConfig* MKLDNN quantizer config. + /// \return MkldnnQuantizerConfig* OneDNN quantizer config. /// MkldnnQuantizerConfig* mkldnn_quantizer_config() const; @@ -1427,7 +1427,7 @@ struct PD_INFER_DECL AnalysisConfig { // NNAdapter related LiteNNAdapterConfig nnadapter_config_; - // mkldnn related. + // onednn related. int mkldnn_cache_capacity_{10}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; diff --git a/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h b/paddle/fluid/inference/api/paddle_onednn_quantizer_config.h similarity index 99% rename from paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h rename to paddle/fluid/inference/api/paddle_onednn_quantizer_config.h index 1208c29c79a9c..c44f7a3e0d049 100644 --- a/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h +++ b/paddle/fluid/inference/api/paddle_onednn_quantizer_config.h @@ -53,7 +53,7 @@ enum class ScaleAlgo { /// /// \class MkldnnQuantizerConfig /// -/// \brief Config for mkldnn quantize. +/// \brief Config for onednn quantize. /// /// The MkldnnQuantizerConfig is used to configure Mkldnn's quantization /// parameters, including scale algorithm, warmup data, warmup batch size, diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index f55fab3e71b08..7b399d67d3557 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -358,34 +358,34 @@ void CpuPassStrategy::EnableMKLDNN() { // TODO(Superjomn) Consider the way to mix CPU with GPU. #ifdef PADDLE_WITH_DNNL if (!use_mkldnn_) { - passes_.insert(passes_.begin(), "mkldnn_placement_pass"); + passes_.insert(passes_.begin(), "onednn_placement_pass"); for (auto &pass : std::vector({ "squeeze2_transpose2_onednn_fuse_pass", - "depthwise_conv_mkldnn_pass", // + "depthwise_conv_onednn_pass", // "conv_bn_fuse_pass", // Execute BN passes again to "conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order - "conv_affine_channel_mkldnn_fuse_pass", // + "conv_affine_channel_onednn_fuse_pass", // "conv_transpose_bn_fuse_pass", // "conv_transpose_eltwiseadd_bn_fuse_pass", // - "conv_bias_mkldnn_fuse_pass", // - "conv_transpose_bias_mkldnn_fuse_pass", + "conv_bias_onednn_fuse_pass", // + "conv_transpose_bias_onednn_fuse_pass", // TODO(baoachun): Need to support 5-dimensional input. - // "conv3d_bias_mkldnn_fuse_pass", // - "conv_elementwise_add_mkldnn_fuse_pass", - "conv_activation_mkldnn_fuse_pass", // + // "conv3d_bias_onednn_fuse_pass", // + "conv_elementwise_add_onednn_fuse_pass", + "conv_activation_onednn_fuse_pass", // "scale_matmul_fuse_pass", // - "reshape_transpose_matmul_mkldnn_fuse_pass", // - "matmul_transpose_reshape_mkldnn_fuse_pass", // - "matmul_elementwise_add_mkldnn_fuse_pass", // - "matmul_activation_mkldnn_fuse_pass", // + "reshape_transpose_matmul_onednn_fuse_pass", // + "matmul_transpose_reshape_onednn_fuse_pass", // + "matmul_elementwise_add_onednn_fuse_pass", // + "matmul_activation_onednn_fuse_pass", // // Disabled due to topology-dependent speed-up - "fc_mkldnn_pass", - "fc_act_mkldnn_fuse_pass", + "fc_onednn_pass", + "fc_act_onednn_fuse_pass", "self_attention_fuse_pass", // "batch_norm_act_fuse_pass", // "softplus_activation_onednn_fuse_pass", // - "shuffle_channel_mkldnn_detect_pass", // + "shuffle_channel_onednn_detect_pass", // "elementwise_act_onednn_fuse_pass", // "operator_scale_onednn_fuse_pass", // "operator_unsqueeze2_onednn_fuse_pass", // @@ -419,8 +419,8 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { void CpuPassStrategy::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_DNNL if (!use_mkldnn_bfloat16_) { - passes_.emplace_back("fc_mkldnn_pass"); - passes_.emplace_back("fc_act_mkldnn_fuse_pass"); + passes_.emplace_back("fc_onednn_pass"); + passes_.emplace_back("fc_act_onednn_fuse_pass"); passes_.emplace_back("cpu_bfloat16_placement_pass"); passes_.emplace_back("cpu_bfloat16_pass"); @@ -437,8 +437,8 @@ void CpuPassStrategy::EnableMkldnnInt8() { if (!use_mkldnn_int8_) { passes_.clear(); passes_.emplace_back("simplify_with_basic_ops_pass"); - passes_.emplace_back("quant_dequant_mkldnn_pass"); - passes_.emplace_back("mkldnn_placement_pass"); + passes_.emplace_back("quant_dequant_onednn_pass"); + passes_.emplace_back("onednn_placement_pass"); passes_.emplace_back("constant_folding_pass"); passes_.emplace_back("squeeze2_transpose2_onednn_fuse_pass"); passes_.emplace_back("layer_norm_fuse_pass"); @@ -462,27 +462,27 @@ void CpuPassStrategy::EnableMkldnnInt8() { passes_.emplace_back("matmul_scale_fuse_pass"); passes_.emplace_back("gpu_cpu_map_matmul_to_mul_pass"); passes_.emplace_back("repeated_fc_relu_fuse_pass"); - passes_.emplace_back("depthwise_conv_mkldnn_pass"); + passes_.emplace_back("depthwise_conv_onednn_pass"); passes_.emplace_back("conv_bn_fuse_pass"); passes_.emplace_back("conv_eltwiseadd_bn_fuse_pass"); - passes_.emplace_back("conv_affine_channel_mkldnn_fuse_pass"); + passes_.emplace_back("conv_affine_channel_onednn_fuse_pass"); passes_.emplace_back("conv_transpose_bn_fuse_pass"); passes_.emplace_back("conv_transpose_eltwiseadd_bn_fuse_pass"); - passes_.emplace_back("conv_bias_mkldnn_fuse_pass"); - passes_.emplace_back("conv_transpose_bias_mkldnn_fuse_pass"); - passes_.emplace_back("conv_elementwise_add_mkldnn_fuse_pass"); - passes_.emplace_back("conv_activation_mkldnn_fuse_pass"); + passes_.emplace_back("conv_bias_onednn_fuse_pass"); + passes_.emplace_back("conv_transpose_bias_onednn_fuse_pass"); + passes_.emplace_back("conv_elementwise_add_onednn_fuse_pass"); + passes_.emplace_back("conv_activation_onednn_fuse_pass"); passes_.emplace_back("fc_fuse_pass"); passes_.emplace_back("repeated_fc_relu_fuse_pass"); - passes_.emplace_back("fc_mkldnn_pass"); - passes_.emplace_back("fc_act_mkldnn_fuse_pass"); - passes_.emplace_back("matmul_transpose_reshape_mkldnn_fuse_pass"); + passes_.emplace_back("fc_onednn_pass"); + passes_.emplace_back("fc_act_onednn_fuse_pass"); + passes_.emplace_back("matmul_transpose_reshape_onednn_fuse_pass"); passes_.emplace_back("batch_norm_act_fuse_pass"); passes_.emplace_back("softplus_activation_onednn_fuse_pass"); - passes_.emplace_back("compute_propagate_scales_mkldnn_pass"); + passes_.emplace_back("compute_propagate_scales_onednn_pass"); passes_.emplace_back("scale_matmul_fuse_pass"); - passes_.emplace_back("reshape_transpose_matmul_mkldnn_fuse_pass"); - passes_.emplace_back("matmul_elementwise_add_mkldnn_fuse_pass"); + passes_.emplace_back("reshape_transpose_matmul_onednn_fuse_pass"); + passes_.emplace_back("matmul_elementwise_add_onednn_fuse_pass"); passes_.emplace_back("operator_scale_onednn_fuse_pass"); passes_.emplace_back("operator_unsqueeze2_onednn_fuse_pass"); passes_.emplace_back("operator_reshape2_onednn_fuse_pass"); @@ -510,7 +510,7 @@ void CpuPassStrategy::DisableMkldnnFcPasses() { void CpuPassStrategy::EraseFcMkldnnPasses() { std::vector fc_passes_to_erase( - {"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"}); + {"fc_onednn_pass", "fc_act_onednn_fuse_pass"}); for (const auto &pass : fc_passes_to_erase) { int idx = static_cast(GetPassIndex(pass)); if (idx != -1) { @@ -538,6 +538,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "cast_embedding_trans_ids_to_int32_pass", "delete_elementwise_mul_op_pass", "generate_sequence_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass", "embedding_with_eltwise_add_xpu_fuse_pass", "qk_qkv_attention_xpu_fuse_pass", "multi_encoder_xpu_fuse_pass", @@ -545,6 +546,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "multi_encoder_xpu_slice_fuse_pass", "fused_multi_transformer_cachekv_layout_trans_pass", "fused_multi_transformer_int8_cachekv_layout_trans_pass", + "cross_attention_xpu_fuse_pass", "decoder_attention_xpu_fuse_pass", "one_beam_size_fuse_pass", "fold_interp_outsize_fuse_pass", @@ -586,6 +588,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "xpu_quantize_op_pass", "xpu_quantize_squash_pass", "link_xpu_op_max_pass", + "spatial_transformer_resblock_xpu_fuse_pass", "delete_isolated_node_pass", "inplace_op_var_pass", }); @@ -617,17 +620,24 @@ const std::vector kPirXpuPasses{// Functional pass "map_op_to_another_pass", "identity_op_clean_pass", // Operator fusion pass - "add_layernorm_xpu_fuse_pass"}; + "add_layernorm_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass"}; const std::vector kPirMkldnnPasses{ + "depthwise_conv_onednn_pass", + "squeeze_transpose_onednn_fuse_pass", "conv2d_bias_fuse_pass", "conv2d_transpose_bias_fuse_pass", "conv3d_bias_fuse_pass", "batch_norm_act_fuse_pass", + "scale_matmul_fuse_pass", "reshape_transpose_matmul_fuse_pass", + "matmul_transpose_reshape_fuse_pass", "matmul_elementwise_add_fuse_pass", "matmul_activation_fuse_pass", - "conv_elementwise_add_mkldnn_fuse_pass"}; + "conv_elementwise_add_onednn_fuse_pass", + "conv_activation_onednn_fuse_pass", + "conv_concat_activation_onednn_fuse_pass"}; const std::vector kPirCpuPasses{}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 79ef68c853cfb..013fb8d477924 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -43,7 +43,7 @@ namespace paddle { /// Example Usage: /// Build a new pass. /// \code{cpp} -/// const vector passes(1, "conv_relu_mkldnn_fuse_pass"); +/// const vector passes(1, "conv_relu_onednn_fuse_pass"); /// PaddlePassBuilder builder(passes); /// \endcode class PD_INFER_DECL PaddlePassBuilder { @@ -139,24 +139,24 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \brief Enable the use of cuDNN kernel. virtual void EnableCUDNN() {} - /// \brief Enable the use of MKLDNN. - /// The MKLDNN control exists in both CPU and GPU mode, because there can + /// \brief Enable the use of OneDNN. + /// The OneDNN control exists in both CPU and GPU mode, because there can /// still be some CPU kernels running in GPU mode. virtual void EnableMKLDNN() {} - /// \brief Disable the use of MKLDNN. + /// \brief Disable the use of OneDNN. virtual void DisableMKLDNN() {} - /// \brief Enable MKLDNN quantize optimization. + /// \brief Enable OneDNN quantize optimization. virtual void EnableMkldnnQuantizer() {} - /// \brief Enable MKLDNN bfloat16. + /// \brief Enable OneDNN bfloat16. virtual void EnableMkldnnBfloat16() {} - /// \brief Enable MKLDNN int8. + /// \brief Enable OneDNN int8. virtual void EnableMkldnnInt8() {} - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. virtual void DisableMkldnnFcPasses() {} /// \brief Check if we are using gpu. @@ -214,26 +214,26 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { /// \brief Enable the use of cuDNN kernel. void EnableCUDNN() override; - /// \brief Enable the use of MKLDNN. + /// \brief Enable the use of OneDNN. void EnableMKLDNN() override; - /// \brief Disable the use of MKLDNN. + /// \brief Disable the use of OneDNN. void DisableMKLDNN() override; - /// \brief Enable MKLDNN quantize optimization. + /// \brief Enable OneDNN quantize optimization. void EnableMkldnnQuantizer() override; - /// \brief Enable MKLDNN bfloat16. + /// \brief Enable OneDNN bfloat16. void EnableMkldnnBfloat16() override; - /// \brief Enable MKLDNN int8. + /// \brief Enable OneDNN int8. void EnableMkldnnInt8() override; - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. void DisableMkldnnFcPasses() override; protected: - /// \brief Erase MKLDNN fc passes. + /// \brief Erase OneDNN fc passes. void EraseFcMkldnnPasses(); /// \cond Protected @@ -276,7 +276,7 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { /// \brief Not supported in GPU mode yet. void EnableMkldnnInt8() override; - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. void DisableMkldnnFcPasses() override; /// \brief Default destructor. diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h index 427e9b95ac499..f1bfe828cbcf2 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.h +++ b/paddle/fluid/inference/capi_exp/pd_config.h @@ -526,14 +526,14 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigLiteEngineEnabled( PADDLE_CAPI_EXPORT extern void PD_ConfigSwitchIrDebug( __pd_keep PD_Config* pd_config, PD_Bool x); /// -/// \brief Turn on MKLDNN. +/// \brief Turn on OneDNN. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMKLDNN( __pd_keep PD_Config* pd_config); /// -/// \brief Set the cache capacity of different input shapes for MKLDNN. +/// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -544,10 +544,10 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMKLDNN( PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnCacheCapacity( __pd_keep PD_Config* pd_config, int32_t capacity); /// -/// \brief A boolean state telling whether to use the MKLDNN. +/// \brief A boolean state telling whether to use the OneDNN. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN. +/// \return Whether to use the OneDNN. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnEnabled( __pd_keep PD_Config* pd_config); @@ -570,7 +570,7 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigSetCpuMathLibraryNumThreads( PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGetCpuMathLibraryNumThreads( __pd_keep PD_Config* pd_config); /// -/// \brief Specify the operator type list to use MKLDNN acceleration. +/// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param[in] pd_config config /// \param[in] ops_num The number of operator type list. @@ -579,32 +579,32 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGetCpuMathLibraryNumThreads( PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnOp( __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list); /// -/// \brief Turn on MKLDNN quantization. +/// \brief Turn on OneDNN quantization. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnQuantizer( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether the MKLDNN quantization is enabled. +/// \brief A boolean state telling whether the OneDNN quantization is enabled. /// /// \param[in] pd_config config -/// \return Whether the MKLDNN quantization is enabled. +/// \return Whether the OneDNN quantization is enabled. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnQuantizerEnabled( __pd_keep PD_Config* pd_config); /// -/// \brief Turn on MKLDNN bfloat16. +/// \brief Turn on OneDNN bfloat16. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnBfloat16( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether to use the MKLDNN Bfloat16. +/// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN Bfloat16. +/// \return Whether to use the OneDNN Bfloat16. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled( __pd_keep PD_Config* pd_config); @@ -617,17 +617,17 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled( PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op( __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list); /// -/// \brief Turn on MKLDNN int8. +/// \brief Turn on OneDNN int8. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnInt8( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether to use the MKLDNN int8. +/// \brief A boolean state telling whether to use the OneDNN int8. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN int8. +/// \return Whether to use the OneDNN int8. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnInt8Enabled( __pd_keep PD_Config* pd_config); diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp index 0d585f938be8c..b4cf4a0953169 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp +++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp @@ -122,7 +122,7 @@ Java_com_baidu_paddle_inference_Config_cpuMathLibraryNumThreads( return mathThreadsNum; } -// 5. MKLDNN settings +// 5. OneDNN settings JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMKLDNN( JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) { diff --git a/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java b/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java index a312cc73fde22..e9bef0d271f05 100644 --- a/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java +++ b/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java @@ -208,7 +208,7 @@ public void resetCppPaddleConfigPointer() { private native int cpuMathLibraryNumThreads(long cppPaddleConfigPointer); - // 5. MKLDNN settings + // 5. OneDNN settings private native void enableMKLDNN(long cppPaddleConfigPointer); diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go index 9d0a1e5864418..c2e2b410e4061 100644 --- a/paddle/fluid/inference/goapi/config.go +++ b/paddle/fluid/inference/goapi/config.go @@ -554,14 +554,14 @@ func (config *Config) SwitchIrDebug(x bool) { } /// -/// \brief Turn on MKLDNN. +/// \brief Turn on OneDNN. /// func (config *Config) EnableMKLDNN() { C.PD_ConfigEnableMKLDNN(config.c) } /// -/// \brief Set the cache capacity of different input shapes for MKLDNN. +/// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -573,9 +573,9 @@ func (config *Config) SetMkldnnCacheCapacity(capacity int32) { } /// -/// \brief A boolean state telling whether to use the MKLDNN. +/// \brief A boolean state telling whether to use the OneDNN. /// -/// \return bool Whether to use the MKLDNN. +/// \return bool Whether to use the OneDNN. /// func (config *Config) MkldnnEnabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnEnabled(config.c)) @@ -609,7 +609,7 @@ func (config *Config) CpuMathLibraryNumThreads() int32 { // NativeConfig ToNativeConfig() const; /// -/// \brief Specify the operator type list to use MKLDNN acceleration. +/// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param opList The operator type list. /// @@ -627,23 +627,23 @@ func (config *Config) SetMKLDNNOp(opList []string) { } /// -/// \brief Turn on MKLDNN quantization. +/// \brief Turn on OneDNN quantization. /// func (config *Config) EnableMkldnnQuantizer() { C.PD_ConfigEnableMkldnnQuantizer(config.c) } /// -/// \brief Turn on MKLDNN bfloat16. +/// \brief Turn on OneDNN bfloat16. /// func (config *Config) EnableMkldnnBfloat16() { C.PD_ConfigEnableMkldnnBfloat16(config.c) } /// -/// \brief A boolean state telling whether to use the MKLDNN Bfloat16. +/// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// -/// \return bool Whether to use the MKLDNN Bfloat16. +/// \return bool Whether to use the OneDNN Bfloat16. /// func (config *Config) MkldnnBfloat16Enabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnBfloat16Enabled(config.c)) @@ -677,9 +677,9 @@ func (config *Config) ThreadLocalStreamEnabled() bool { } /// -/// \brief A boolean state telling whether the MKLDNN quantization is enabled. +/// \brief A boolean state telling whether the OneDNN quantization is enabled. /// -/// \return bool Whether the MKLDNN quantization is enabled. +/// \return bool Whether the OneDNN quantization is enabled. /// func (config *Config) MkldnnQuantizerEnabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnQuantizerEnabled(config.c)) diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index f41a25fe9717c..d3d9174e84c48 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -2023,6 +2023,19 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber { const OpInputInfoList& input_infos, pir::Block* block) override { std::vector op_inputs; + if (op_desc.HasInput("ValueTensor", true) && + op_desc.Input("ValueTensor", true).size() > 0) { + auto value_tensor_vars = op_desc.Input("ValueTensor", true); + auto defining_info = (*param_map)[value_tensor_vars[0]]; + op_inputs.push_back(defining_info.value); + } else { + float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); + pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value); + auto defining_op = + InsertFullOperationForAttributeInput(ctx, block, new_attr); + op_inputs.push_back(defining_op->result(0)); + } + if (op_desc.HasInput("ShapeTensor", true) && op_desc.Input("ShapeTensor", true).size() > 0) { auto shape_tensor_vars = op_desc.Input("ShapeTensor", true); @@ -2044,18 +2057,6 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber { op_inputs.push_back(defining_op->result(0)); } - if (op_desc.HasInput("ValueTensor", true) && - op_desc.Input("ValueTensor", true).size() > 0) { - auto value_tensor_vars = op_desc.Input("ValueTensor", true); - auto defining_info = (*param_map)[value_tensor_vars[0]]; - op_inputs.push_back(defining_info.value); - } else { - float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); - pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value); - auto defining_op = - InsertFullOperationForAttributeInput(ctx, block, new_attr); - op_inputs.push_back(defining_op->result(0)); - } return op_inputs; } diff --git a/paddle/fluid/ir_adaptor/translator/utils.cc b/paddle/fluid/ir_adaptor/translator/utils.cc index 07bbb644c6b72..4015d358930b4 100644 --- a/paddle/fluid/ir_adaptor/translator/utils.cc +++ b/paddle/fluid/ir_adaptor/translator/utils.cc @@ -95,7 +95,7 @@ std::vector CheckUnregisteredOperationInBlock( OpTranscriber general_handler; try { general_handler.LookUpOpInfo(ctx, *op); - } catch (pir::IrNotMetException& e) { + } catch (common::enforce::EnforceNotMet& e) { unregistered_ops.push_back(op->Type()); } } diff --git a/paddle/fluid/jit/engine/interpreter_engine.cc b/paddle/fluid/jit/engine/interpreter_engine.cc index e8f622641c33b..7d575ff838f4f 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.cc +++ b/paddle/fluid/jit/engine/interpreter_engine.cc @@ -52,11 +52,11 @@ void InterpreterEngine::CreateInterpreterCore() { framework::ir::PassRegistry::Instance().Get("delete_dropout_op_x_pass"); pass->Apply(&graph); #ifdef PADDLE_WITH_DNNL - auto mkldnn_pass = - framework::ir::PassRegistry::Instance().Get("mkldnn_placement_pass"); - mkldnn_pass->Set("mkldnn_enabled_op_types", + auto onednn_pass = + framework::ir::PassRegistry::Instance().Get("onednn_placement_pass"); + onednn_pass->Set("mkldnn_enabled_op_types", new std::unordered_set({})); - mkldnn_pass->Apply(&graph); + onednn_pass->Apply(&graph); #endif GraphToProgram(graph, &converted_prog_, nullptr); diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 18bd48d0cd2e1..2fd782875856e 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -2,12 +2,12 @@ add_subdirectory(allocation) file(GLOB fluid_memory_srcs "*.cc") -if(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) +if(WITH_ONEDNN) + set(ONEDNN_CTX_DEPS onednn) else() - set(MKLDNN_CTX_DEPS) + set(ONEDNN_CTX_DEPS) endif() -set(fluid_memory_deps place enforce common allocator ${MKLDNN_CTX_DEPS}) +set(fluid_memory_deps place enforce common allocator ${ONEDNN_CTX_DEPS}) cc_library( fluid_memory diff --git a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc index 218068aeb9c97..bbc0915fe10ce 100644 --- a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc @@ -50,35 +50,11 @@ void StreamSafeCustomDeviceAllocation::RecordStream( outstanding_event_map_[stream]->Init(place()); VLOG(9) << "Create a new event " << outstanding_event_map_[stream]->raw_event(); - auto stream_wrapper = phi::stream::Stream(place(), stream); - VLOG(8) << "Record event " << outstanding_event_map_[stream]->raw_event() - << " to stream " << stream; - outstanding_event_map_[stream]->Record(&stream_wrapper); - } -} - -void StreamSafeCustomDeviceAllocation::MarkAsWillBeFreed() { - std::lock_guard lock_guard(outstanding_event_map_lock_); - if (!will_be_freed_) { - will_be_freed_ = false; - VLOG(8) << "ptr: " << ptr() << " will be freed"; - if (phi::DeviceManager::HasDeviceType(place_.GetDeviceType()) && - outstanding_event_map_.find(owning_stream_) == - outstanding_event_map_.end()) { - std::call_once(once_flag_, - [this] { phi::DeviceManager::SetDevice(place_); }); - outstanding_event_map_.insert( - {owning_stream_, std::make_shared()}); - outstanding_event_map_[owning_stream_]->Init(place_); - VLOG(9) << "Create a new event " - << outstanding_event_map_[owning_stream_]->raw_event(); - auto stream_wrapper = phi::stream::Stream(place_, owning_stream_); - VLOG(8) << "Record event " - << outstanding_event_map_[owning_stream_]->raw_event() - << " to stream " << owning_stream_; - outstanding_event_map_[owning_stream_]->Record(&stream_wrapper); - } } + auto stream_wrapper = phi::stream::Stream(place(), stream); + VLOG(8) << "Record event " << outstanding_event_map_[stream]->raw_event() + << " to stream " << stream; + outstanding_event_map_[stream]->Record(&stream_wrapper); } bool StreamSafeCustomDeviceAllocation::CanBeFreed() { @@ -190,7 +166,6 @@ void StreamSafeCustomDeviceAllocator::FreeImpl(phi::Allocation* allocation) { phi::DeviceContextPool::Instance().Get(place_)) ->stream()); } - stream_safe_cuda_allocation->MarkAsWillBeFreed(); if (stream_safe_cuda_allocation->CanBeFreed()) { VLOG(9) << "Directly delete allocation"; delete stream_safe_cuda_allocation; diff --git a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h index 75f25fc0cfc2a..6d6bea9b2535c 100644 --- a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h +++ b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h @@ -37,7 +37,6 @@ class StreamSafeCustomDeviceAllocation : public Allocation { void RecordStream(phi::stream::stream_t stream); bool CanBeFreed(); - void MarkAsWillBeFreed(); phi::stream::stream_t GetOwningStream() const; void SetOwningStream(phi::stream::stream_t s); diff --git a/paddle/fluid/operators/activation_op.cu.h b/paddle/fluid/operators/activation_op.cu.h index d9b1545abce4c..37fd511d7de17 100644 --- a/paddle/fluid/operators/activation_op.cu.h +++ b/paddle/fluid/operators/activation_op.cu.h @@ -13,9 +13,9 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" -#include "paddle/fluid/platform/bfloat16.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/kernels/funcs/activation_functor.h" namespace paddle { diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 38432f8768f59..399ea6963dd0b 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -30,7 +30,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/activation_functor.h" @@ -53,13 +53,13 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, phi::DenseTensor** Out) { auto x_var = context.InputVar("X"); auto out_var = context.OutputVar("Out"); - PADDLE_ENFORCE_NOT_NULL(x_var, - platform::errors::NotFound( - "Cannot get input Variable X, variable name = %s", - context.InputName("X"))); + PADDLE_ENFORCE_NOT_NULL( + x_var, + phi::errors::NotFound("Cannot get input Variable X, variable name = %s", + context.InputName("X"))); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get output Variable Out, variable name = %s", context.OutputName("Out"))); if (CanBeUsedBySelectedRows.count(context.Type())) { @@ -73,9 +73,9 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, PADDLE_ENFORCE_NOT_NULL( *Out, - platform::errors::NotFound("Cannot get the tensor from the Variable " - "Output(Out), variable name = %s", - context.OutputName("Out"))); + phi::errors::NotFound("Cannot get the tensor from the Variable " + "Output(Out), variable name = %s", + context.OutputName("Out"))); } template @@ -94,23 +94,21 @@ inline void ExtractActivationGradTensor( out_var = context.InputVar("Out"); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get input Variable Out, variable name = %s", context.InputName("Out"))); } PADDLE_ENFORCE_NOT_NULL( out_grad_var, - platform::errors::NotFound( - "Cannot get input Variable %s, variable name = %s", - framework::GradVarName("Out"), - context.InputName(framework::GradVarName("Out")))); + phi::errors::NotFound("Cannot get input Variable %s, variable name = %s", + framework::GradVarName("Out"), + context.InputName(framework::GradVarName("Out")))); PADDLE_ENFORCE_NOT_NULL( x_grad_var, - platform::errors::NotFound( - "Cannot get output Variable %s, variable name = %s", - framework::GradVarName("X"), - context.OutputName(framework::GradVarName("X")))); + phi::errors::NotFound("Cannot get output Variable %s, variable name = %s", + framework::GradVarName("X"), + context.OutputName(framework::GradVarName("X")))); if (CanBeUsedBySelectedRows.count(context.Type())) { *dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar( @@ -137,19 +135,19 @@ inline void ExtractActivationGradTensor( } } - PADDLE_ENFORCE_NOT_NULL(*dX, - platform::errors::NotFound( - "Cannot get the tensor from the Variable " - "Output(Out), variable name = %s", - context.OutputName(framework::GradVarName("X")))); + PADDLE_ENFORCE_NOT_NULL( + *dX, + phi::errors::NotFound("Cannot get the tensor from the Variable " + "Output(Out), variable name = %s", + context.OutputName(framework::GradVarName("X")))); if (static_cast(kDepValue) & static_cast(ActBwdOpFwdDeps::kDepX)) { auto x_var = context.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::NotFound("Cannot get the tensor from the " - "Variable Input(X), variable name = %s", - context.InputName("X"))); + phi::errors::NotFound("Cannot get the tensor from the " + "Variable Input(X), variable name = %s", + context.InputName("X"))); if (CanBeUsedBySelectedRows.count(context.Type())) { *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var); } else { @@ -384,16 +382,15 @@ inline void ExtractDoubleGradTensorWithInputDOut( auto ddo_var = ctx.OutputVar("DDOut"); PADDLE_ENFORCE_NOT_NULL( ddx_var, - platform::errors::NotFound( - "Cannot get input Variable Out, variable name = %s", - ctx.InputName("DDX"))); + phi::errors::NotFound("Cannot get input Variable Out, variable name = %s", + ctx.InputName("DDX"))); *ddX = ctx.Input("DDX"); if (ddo_var) { *ddOut = ctx.Output("DDOut"); } PADDLE_ENFORCE_NOT_NULL( ddX, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get the tensor from the Variable DDX, variable name = %s", ctx.OutputName("DDX"))); @@ -401,9 +398,8 @@ inline void ExtractDoubleGradTensorWithInputDOut( auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::NotFound( - "Cannot get input Variable Out, variable name = %s", - ctx.InputName("X"))); + phi::errors::NotFound("Cannot get input Variable Out, variable name = %s", + ctx.InputName("X"))); auto dx_var = ctx.OutputVar("DX"); *X = ctx.Input("X"); if (dx_var) { diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index ad24d37b90d81..13d6f7449f6dd 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -72,7 +72,7 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GE( alpha, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute 'alpha' must be greater than or equal to 0.0.")); }); AddAttr("beta", "The scale of Position Embedding.") @@ -81,7 +81,7 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GE( beta, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute 'beta' must be greater than or equal to 0.0.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/add_position_encoding_op.h b/paddle/fluid/operators/add_position_encoding_op.h index 4547f6321a01d..009e40efeae38 100644 --- a/paddle/fluid/operators/add_position_encoding_op.h +++ b/paddle/fluid/operators/add_position_encoding_op.h @@ -41,7 +41,7 @@ class AddPositionEncodingKernel : public framework::OpKernel { if (x_lod.empty()) { PADDLE_ENFORCE_EQ(x_dim.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s dimension of AddPositionEncodingOp " "should be equal to " "3, but received %d. ", @@ -52,14 +52,14 @@ class AddPositionEncodingKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(x_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s dimension of AddPositionEncodingOp " "should be equal to " "2, but received %d. ", x_dim.size())); PADDLE_ENFORCE_EQ(x_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s lod level of AddPositionEncodingOp " "should be equal to " "1, but received %d. ", @@ -70,13 +70,13 @@ class AddPositionEncodingKernel : public framework::OpKernel { enc_size = x_dim[1]; } - PADDLE_ENFORCE_EQ(enc_size % 2, - 0, - platform::errors::InvalidArgument( - "The input(X)'s feature size of " - "AddPositionEncodingOp only support even, " - "but received an odd number: %d. ", - enc_size)); + PADDLE_ENFORCE_EQ( + enc_size % 2, + 0, + phi::errors::InvalidArgument("The input(X)'s feature size of " + "AddPositionEncodingOp only support even, " + "but received an odd number: %d. ", + enc_size)); const int half_size = enc_size / 2; for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index f44c181cca097..b80672216efe3 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -80,13 +80,13 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dims.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Scale) must be 1," "But received the dimensions of Input(Scale) is [%d] ", scale_dims.size())); PADDLE_ENFORCE_EQ(b_dims.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Bias) must be 1," "But received the dimensions of Input(Bias) is [%d] ", scale_dims.size())); @@ -94,7 +94,7 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dims[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension value of Input(Scale) must be [%d]," "But received [%d].", C, @@ -104,7 +104,7 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension value of Input(Bias) must be [%d]," "But received [%d].", C, diff --git a/paddle/fluid/operators/affine_channel_op_xpu.cc b/paddle/fluid/operators/affine_channel_op_xpu.cc index 799bb87cf9892..9024dab8f98c2 100644 --- a/paddle/fluid/operators/affine_channel_op_xpu.cc +++ b/paddle/fluid/operators/affine_channel_op_xpu.cc @@ -70,7 +70,7 @@ class AffineChannelXPUKernel : public framework::OpKernel { dev_ctx.x_context(), x_d, scale_d, y_d, x_shape, b_shape); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_mul XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -78,7 +78,7 @@ class AffineChannelXPUKernel : public framework::OpKernel { dev_ctx.x_context(), y_d, bias_d, y_d, x_shape, b_shape); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_add XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -140,28 +140,28 @@ class AffineChannelGradXPUKernel : public framework::OpKernel { dev_ctx.x_context(), dy_d, dbias_d, x_shape, rdims); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The reduce_sum XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); T* tmp = RAII_GUARD.alloc_l3_or_gm(dy->numel()); PADDLE_ENFORCE_NOT_NULL( - tmp, platform::errors::External("XPU has no enough memory")); + tmp, phi::errors::External("XPU has no enough memory")); r = xpu::mul( dev_ctx.x_context(), dy_d, x->data(), tmp, dy->numel()); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("The mul XPU OP return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); + phi::errors::External("The mul XPU OP return wrong value[%d %s]", + r, + XPUAPIErrorMsg[r])); r = xpu::reduce_sum( dev_ctx.x_context(), tmp, dscale_d, x_shape, rdims); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The reduce_sum XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -172,7 +172,7 @@ class AffineChannelGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_mul XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h index c7b8ce3f381d1..3d8a08b6725f1 100644 --- a/paddle/fluid/operators/array_operator.h +++ b/paddle/fluid/operators/array_operator.h @@ -34,16 +34,15 @@ class ArrayOp : public framework::OperatorBase { size_t GetOffset(const framework::Scope &scope, const platform::Place &place) const { auto *i = scope.FindVar(Input("I")); - PADDLE_ENFORCE_NOT_NULL( - i, platform::errors::NotFound("Input(I) is not found.")); + PADDLE_ENFORCE_NOT_NULL(i, phi::errors::NotFound("Input(I) is not found.")); auto &i_tensor = i->Get(); - PADDLE_ENFORCE_EQ(i_tensor.numel(), - 1, - platform::errors::InvalidArgument( - "Input(I) must have numel 1. " - "But received %d, and it's shape is [%s].", - i_tensor.numel(), - i_tensor.dims())); + PADDLE_ENFORCE_EQ( + i_tensor.numel(), + 1, + phi::errors::InvalidArgument("Input(I) must have numel 1. " + "But received %d, and it's shape is [%s].", + i_tensor.numel(), + i_tensor.dims())); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index 275dc6a99d63e..fae4ecbf9eb2b 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -59,7 +59,7 @@ struct ArrayToLoDFunctor { Apply(static_cast(pool.Get(place))); #else PADDLE_THROW( - platform::errors::Unavailable("Paddle is not compiled with CUDA.")); + phi::errors::Unavailable("Paddle is not compiled with CUDA.")); #endif } } @@ -101,7 +101,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { // dim PADDLE_ENFORCE_EQ(x.empty(), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "There's no element in the input array.")); int rank = x[0].dims().size(); platform::Place place = x[0].place(); @@ -116,7 +116,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ins_i_dims, ins_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current dimension is %d, and the previous dimension is %d.", @@ -126,7 +126,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( x[i].place(), place, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The place class of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current place is %d, and the previous place is %d.", @@ -136,7 +136,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( x[i].dtype(), data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The date type of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current data type is %d, and the previous data type is %d.", @@ -172,7 +172,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { cur_level_lod.push_back(cur_level_lod.back() + table_items[idx].length); PADDLE_ENFORCE_LE(table_items[idx].length, x.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The RankTable items length should less than or " "equal to Input(X) size," "but receive TankTable items length is %d , longer " @@ -194,7 +194,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( end_offset, start_offset, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The lod data start offset should smaller or equal to the end " "offset," "but the start offset is %d, larger than end offset %d.", @@ -243,11 +243,11 @@ class ArrayToLoDTensorInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("X"), true, - platform::errors::NotFound("Input(X) of BmmOp should not be null.")); - PADDLE_ENFORCE_EQ(context->HasInput("RankTable"), - true, - platform::errors::NotFound( - "Input(RankTable) of BmmOp should not be null.")); + phi::errors::NotFound("Input(X) of BmmOp should not be null.")); + PADDLE_ENFORCE_EQ( + context->HasInput("RankTable"), + true, + phi::errors::NotFound("Input(RankTable) of BmmOp should not be null.")); // For compile-time, the first dim of input X and output Out should be -1. // For runtime, the first dim of output Out should be the sum of all // elements's first dim in input X. The output's dims will be re-computed in diff --git a/paddle/fluid/operators/assert_op.cc b/paddle/fluid/operators/assert_op.cc index 4ab60914908da..5a9fb09d44807 100644 --- a/paddle/fluid/operators/assert_op.cc +++ b/paddle/fluid/operators/assert_op.cc @@ -56,14 +56,14 @@ class AssertOp : public framework::OperatorBase { const platform::Place &dev_place) const override { const framework::Variable *cond_var_ptr = scope.FindVar(Input(kCond.data())); - PADDLE_ENFORCE_NOT_NULL(cond_var_ptr, - platform::errors::NotFound( - "Input(Condition) of AssertOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + cond_var_ptr, + phi::errors::NotFound("Input(Condition) of AssertOp is not found.")); const phi::DenseTensor &cond = cond_var_ptr->Get(); PADDLE_ENFORCE_EQ( cond.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Condition) of AssertOp must be 1. But now " "the Condition's shape is %s.", cond.dims().to_str())); @@ -83,7 +83,7 @@ class AssertOp : public framework::OperatorBase { formatter.Print(x_tensor, name); } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The condition variable '%s' of AssertOp must be " "true, but received false", Input(kCond.data()))); diff --git a/paddle/fluid/operators/assign_op.h b/paddle/fluid/operators/assign_op.h index 6efc621120929..36cee420f0c36 100644 --- a/paddle/fluid/operators/assign_op.h +++ b/paddle/fluid/operators/assign_op.h @@ -64,7 +64,7 @@ class AssignFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for assign op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/assign_pos_op.cc b/paddle/fluid/operators/assign_pos_op.cc index 66c453885e4a9..7def3a0cac503 100644 --- a/paddle/fluid/operators/assign_pos_op.cc +++ b/paddle/fluid/operators/assign_pos_op.cc @@ -41,11 +41,11 @@ class AssignPosOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(cum_count_dtype, X_dtype, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the cum_count and X should be same")); PADDLE_ENFORCE_EQ(cum_count_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the cum_count_dtype, eff_num_len and " "X should be same as int64")); return phi::KernelKey(cum_count_dtype, ctx.device_context().GetPlace()); diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index 5ba8b9367e64e..d147575773c06 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -119,7 +119,7 @@ class AssignValueKernel : public framework::OpKernel { value_name = "int8_values"; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported data type(code %d) for AssignValue operator, only " "supports bool, int32, float32, float64, int8 and int64.", dtype)); diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 6a0775e6331a7..3a5b50a7906e5 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -54,7 +54,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { const int M = static_cast(x_dims[1]); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(X)'s dimension is 2. But received %d.", x_dims.size())); @@ -63,39 +63,39 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(LSTMWeight)'s dimension is 2.But received %d.", w_dims.size())); PADDLE_ENFORCE_EQ( w_dims[0], D + M, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LSTMWeight dims should be (%d + %d) * %d.", D, M, 4 * D)); auto b_dims = ctx->GetInputDim("LSTMBias"); PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument("Input(LSTMBias)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(b_dims[0], - 1, - platform::errors::InvalidArgument( - "LSTMBias dims should be 1 x %d.", 4 * D)); - PADDLE_ENFORCE_EQ(b_dims[1], - 4 * D, - platform::errors::InvalidArgument( - "LSTMBias dims should be 1 x %d.", 4 * D)); + phi::errors::InvalidArgument("Input(LSTMBias)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + b_dims[0], + 1, + phi::errors::InvalidArgument("LSTMBias dims should be 1 x %d.", 4 * D)); + PADDLE_ENFORCE_EQ( + b_dims[1], + 4 * D, + phi::errors::InvalidArgument("LSTMBias dims should be 1 x %d.", 4 * D)); auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ( c_dims.size(), 2, - platform::errors::InvalidArgument("Input(C0)'s rank must be 2.")); + phi::errors::InvalidArgument("Input(C0)'s rank must be 2.")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( c_dims[1], D, - platform::errors::InvalidArgument("C0 dims should be N x %d.", D)); + phi::errors::InvalidArgument("C0 dims should be N x %d.", D)); } if (ctx->HasInput("H0")) { @@ -103,27 +103,27 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( h_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(H0)'s dimension is 2. But received %d.", h_dims.size())); if (ctx->IsRuntime() || (common::product(c_dims) > 0 && common::product(h_dims) > 0)) { PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) " "should be the same.")); } } auto atten_w_dims = ctx->GetInputDim("AttentionWeight"); - PADDLE_ENFORCE_EQ(atten_w_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(AttentionWeight)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + atten_w_dims.size(), + 2, + phi::errors::InvalidArgument("Input(AttentionWeight)'s rank must be 2.")); PADDLE_ENFORCE_EQ(atten_w_dims[0], M + D, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected `AttentionWeight` shape is [(%d + %d), 1]. " "But received shape = [%d, 1], shape[0] is not %d.", M, @@ -132,39 +132,39 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { M + D)); PADDLE_ENFORCE_EQ(atten_w_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionWeight shapes must be (%d + %d) * 1.", M, D)); if (ctx->HasInput("AttentionBias")) { auto atten_b_dims = ctx->GetInputDim("AttentionBias"); - PADDLE_ENFORCE_EQ(atten_b_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(AttentionBias)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(atten_b_dims[0], - 1, - platform::errors::InvalidArgument( - "AttentionBias shapes must be 1 * 1.")); - PADDLE_ENFORCE_EQ(atten_b_dims[1], - 1, - platform::errors::InvalidArgument( - "AttentionBias shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + atten_b_dims.size(), + 2, + phi::errors::InvalidArgument("Input(AttentionBias)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + atten_b_dims[0], + 1, + phi::errors::InvalidArgument("AttentionBias shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + atten_b_dims[1], + 1, + phi::errors::InvalidArgument("AttentionBias shapes must be 1 * 1.")); } if (ctx->HasInput("AttentionScalar")) { auto dims = ctx->GetInputDim("AttentionScalar"); PADDLE_ENFORCE_EQ(dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(AttentionScalar)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(dims[0], - 1, - platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); - PADDLE_ENFORCE_EQ(dims[1], - 1, - platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + dims[0], + 1, + phi::errors::InvalidArgument("AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + dims[1], + 1, + phi::errors::InvalidArgument("AttentionScalar shapes must be 1 * 1.")); } if (ctx->HasInput("AttentionScalarBias")) { @@ -175,15 +175,15 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionLstm"); PADDLE_ENFORCE_EQ(dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(AttentionScalarBias)'s rank must be 2.")); PADDLE_ENFORCE_EQ(dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionScalarBias shapes must be 1 * 1.")); PADDLE_ENFORCE_EQ(dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionScalarBias shapes must be 1 * 1.")); } @@ -381,11 +381,11 @@ class AttentionLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_lod.size(), 1UL, - platform::errors::InvalidArgument("Input(X)'s lod size must be 1.")); + phi::errors::InvalidArgument("Input(X)'s lod size must be 1.")); PADDLE_ENFORCE_EQ( c0->dims()[0], N, - platform::errors::InvalidArgument("C0 dims should be %d x %d.", N, D)); + phi::errors::InvalidArgument("C0 dims should be %d x %d.", N, D)); fc_out->Resize({max_seq_len, 1}); std::function act_gate, act_cell, act_cand; diff --git a/paddle/fluid/operators/batch_fc_op.cc b/paddle/fluid/operators/batch_fc_op.cc index 706cb17e40f34..2eea44e05b057 100644 --- a/paddle/fluid/operators/batch_fc_op.cc +++ b/paddle/fluid/operators/batch_fc_op.cc @@ -27,49 +27,49 @@ class BatchFCOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Input"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "X(Input) of Batch Fully Connected should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Out(Output) of Batch Fully Connected should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "W(Input) of Batch Fully Connected should not be null.")); auto input_dims = ctx->GetInputDim("Input"); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(input_dims.size(), - 3, - platform::errors::InvalidArgument( - "Input of BatchFCOp should have 3D.")); + PADDLE_ENFORCE_EQ( + input_dims.size(), + 3, + phi::errors::InvalidArgument("Input of BatchFCOp should have 3D.")); PADDLE_ENFORCE_EQ( w_dims.size(), 3, - platform::errors::InvalidArgument("W of BatchFCOp should have 3D.")); + phi::errors::InvalidArgument("W of BatchFCOp should have 3D.")); PADDLE_ENFORCE_EQ( input_dims[0], w_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input.dim[0] and W.dim[0] of BatchFCOp should be same.")); PADDLE_ENFORCE_EQ( input_dims[2], w_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input.dim[2] and W.dim[1] of BatchFCOp should be same.")); auto bias_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(bias_dims[0], input_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Bias.dim[0] should be same as input.dim[0].")); PADDLE_ENFORCE_EQ(bias_dims[1], w_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Bias.dim[1] should be same as input.dim[2].")); ctx->SetOutputDim("Out", {input_dims[0], input_dims[1], w_dims[2]}); @@ -89,14 +89,13 @@ class BatchFCGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("Input"), - true, - platform::errors::InvalidArgument("Input should not be null")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), + true, + phi::errors::InvalidArgument("Input should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument("Input(W) should not be null")); + phi::errors::InvalidArgument("Input(W) should not be null")); ctx->SetOutputDim(framework::GradVarName("Input"), ctx->GetInputDim("Input")); diff --git a/paddle/fluid/operators/batch_fc_op.h b/paddle/fluid/operators/batch_fc_op.h index ca8c22243dbe4..5db142d5da6ba 100644 --- a/paddle/fluid/operators/batch_fc_op.h +++ b/paddle/fluid/operators/batch_fc_op.h @@ -26,7 +26,7 @@ class BatchFCKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::Unimplemented("BatchFC only supports GPU now.")); + phi::errors::Unimplemented("BatchFC only supports GPU now.")); } }; } // namespace operators diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 996c6af070631..31a21f2138e6f 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h" @@ -57,12 +57,12 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean and MeanOut should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variance and VarianceOut should share the same memory")); const auto x_dims = ctx->GetInputDim("X"); @@ -71,7 +71,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( (x_dims[i] == -1) || (x_dims[i] > 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension of input tensor is expected to be -1 or a " "positive number, but received %d. Input's shape is [%s].", x_dims[i], @@ -85,7 +85,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { auto mom = ctx->Inputs("MomentumTensor"); PADDLE_ENFORCE_EQ(mom.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor MomentumTensor's size must be 1" "But received: MomentumTensor's size is [%d]", mom.size())); @@ -94,7 +94,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of input " "X must greater than or equal to 2. But received: the shape of input " "X = [%s], the dimension of input X =[%d]", @@ -103,7 +103,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of input X " "must smaller than or equal to 5. But received: the shape of input X " "= [%s], the dimension of input X = [%d]", @@ -121,7 +121,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -134,7 +134,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( bias_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -153,14 +153,14 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { if (check) { PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, ctx->GetInputDim("Scale")[0])); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -191,29 +191,29 @@ phi::KernelKey BatchNormOp::GetExpectedKernelType( bn_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); } PADDLE_ENFORCE_EQ( bn_param_type, framework::TransToProtoVarType( ctx.Input("Mean")->dtype()), - platform::errors::InvalidArgument("Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Variance")->dtype()), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + phi::errors::InvalidArgument("Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), + phi::errors::InvalidArgument("Variance input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -254,11 +254,11 @@ void BatchNormOpMaker::Make() { PADDLE_ENFORCE_GE( epsilon, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be greater or equal than 0.0.")); PADDLE_ENFORCE_LE(epsilon, 0.001f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be less or equal than 0.001.")); }); AddAttr("data_layout", "").SetDefault("NCHW"); @@ -349,7 +349,7 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Scale@GRAD) and Output(Bias@GRAD) must be null " "or not be null at same time. But now, " "has Scale@Grad=[%d], has Bias@GRAD=[%d]", @@ -361,7 +361,7 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( !ctx->Attrs().Get("use_mkldnn"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Using global stats during training is not supported " "in oneDNN version of batch_norm_gradient kernel now.")); } @@ -391,7 +391,7 @@ phi::KernelKey BatchNormGradOp::GetExpectedKernelType( const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("can't find gradient variable of Y")); + phi::errors::InvalidArgument("can't find gradient variable of Y")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -399,7 +399,7 @@ phi::KernelKey BatchNormGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("gradient variable of Y is empty")); + phi::errors::InvalidArgument("gradient variable of Y is empty")); } auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); @@ -532,8 +532,7 @@ phi::KernelKey BatchNormDoubleGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar("DY"); if (var == nullptr) { - PADDLE_THROW( - platform::errors::NotFound("cannot find gradient variable of Y")); + PADDLE_THROW(phi::errors::NotFound("cannot find gradient variable of Y")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -541,7 +540,7 @@ phi::KernelKey BatchNormDoubleGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("gradient variable of Y is empty")); + phi::errors::InvalidArgument("gradient variable of Y is empty")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index cc013bd0b406e..9d48d7858f41a 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -26,8 +26,8 @@ namespace cub = hipcub; #include "paddle/common/flags.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/math_function.h" COMMON_DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index ec3ced614bd92..50a69e6390302 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -114,6 +114,6 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeOpKernel, float, double, - paddle::platform::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/beam_search_decode_op.cu.cc b/paddle/fluid/operators/beam_search_decode_op.cu.cc index bab5423c99b05..beeb13725c6b1 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cu.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cu.cc @@ -23,6 +23,6 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeOpKernel, float, double, - paddle::platform::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h index 7347e228780b4..99735e98276e6 100644 --- a/paddle/fluid/operators/beam_search_decode_op.h +++ b/paddle/fluid/operators/beam_search_decode_op.h @@ -85,7 +85,7 @@ struct BeamSearchDecodeFunctor { template void apply_mix() const { if (std::is_same::value) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "beam search decode op does not support bool!")); } else { @@ -125,7 +125,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( step_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "beam search steps, which is the" "size of Input(Ids) LoDTensorArray. beam search steps should " "be larger than 0, but received %d. ", @@ -134,7 +134,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( source_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "source_num is the sequence number of the" "first decoding step, indicating by Input(Ids)[0].lod[0].size. " "The number of source_num should be larger than" @@ -145,7 +145,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ids->at(i).lod().size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For the i step in beam search steps," "the size of Input(Ids)[i].lod() should larger than 2," "but received %d. ", diff --git a/paddle/fluid/operators/beam_search_decode_op_def.h b/paddle/fluid/operators/beam_search_decode_op_def.h index d358d8255fcf3..ff16e093e0bf5 100644 --- a/paddle/fluid/operators/beam_search_decode_op_def.h +++ b/paddle/fluid/operators/beam_search_decode_op_def.h @@ -90,7 +90,7 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( PADDLE_ENFORCE_NE( src_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "src_num is the sequence number of the first decoding step" ", indicating by Input(Ids)[0].lod[0].size." "src_num has wrong value." @@ -162,12 +162,12 @@ void BeamSearchDecoder::Backtrace(const LoDTensorArray& step_ids, PADDLE_ENFORCE_NE( step_ids.empty(), true, - platform::errors::InvalidArgument("Input(Ids) should not be empty." - "But the Input(Ids) is empty.")); + phi::errors::InvalidArgument("Input(Ids) should not be empty." + "But the Input(Ids) is empty.")); PADDLE_ENFORCE_EQ( step_ids.size(), step_scores.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Input(Ids) and Input(Scores) should be " "the same. But the size of Input(Ids) and Input(Scores) " "are not equal.")); diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.cc b/paddle/fluid/operators/beam_search_decode_op_xpu.cc index 5fd2b2fc6fa35..c438070ce07f9 100644 --- a/paddle/fluid/operators/beam_search_decode_op_xpu.cc +++ b/paddle/fluid/operators/beam_search_decode_op_xpu.cc @@ -30,7 +30,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( step_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "beam search steps, which is the" "size of Input(Ids) LoDTensorArray. beam search steps should " "be larger than 0, but received %d. ", @@ -40,7 +40,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( source_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "source_num is the sequence number of the" "first decoding step, indicating by Input(Ids)[0].lod[0].size. " "The number of source_num should be larger than" @@ -51,7 +51,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ids->at(i).lod().size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For the i step in beam search steps," "the size of Input(Ids)[i].lod() should larger than 2," "but received %d. ", @@ -91,7 +91,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); r = CopyTensorByType( @@ -99,7 +99,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); sentenceIds_temp->set_lod(sentenceIds->lod()); sentenceScores_temp->set_lod(sentenceScores->lod()); @@ -119,7 +119,7 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t) {} #endif diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.h b/paddle/fluid/operators/beam_search_decode_op_xpu.h index 5e63627c6f88c..863b92e9f2b7d 100644 --- a/paddle/fluid/operators/beam_search_decode_op_xpu.h +++ b/paddle/fluid/operators/beam_search_decode_op_xpu.h @@ -45,7 +45,7 @@ int CopyTensorByXPU(const phi::DenseTensor& srcTensor, PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("Execute function SetMeta failed by [%d]", r)); + phi::errors::External("Execute function SetMeta failed by [%d]", r)); if (flag == 0) { T* dstData = @@ -75,8 +75,7 @@ const int CopyTensorByType(const phi::DenseTensor& srcTensor, if (srcTensor.dtype() == phi::DataType::FLOAT32) r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::FLOAT16) - r = CopyTensorByXPU( - srcTensor, dstTensor, flag, place); + r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::FLOAT64) r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::INT32) @@ -88,7 +87,7 @@ const int CopyTensorByType(const phi::DenseTensor& srcTensor, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); return xpu::Error_t::SUCCESS; @@ -117,7 +116,7 @@ struct BeamSearchDecodeXPUFunctor { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); } @@ -135,7 +134,7 @@ struct BeamSearchDecodeXPUFunctor { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByType failed by [%d]", r)); } @@ -148,7 +147,7 @@ struct BeamSearchDecodeXPUFunctor { template void apply_xpu() const { if (std::is_same::value) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "beam search decode op does not support bool!")); } else { BeamSearchDecoder beam_search_decoder(beam_size_, end_id_); diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index fea706bb54a93..0beeb0cc407fe 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -29,16 +29,15 @@ class BeamSearchOpKernel : public framework::OpKernel { auto* pre_ids = context.Input("pre_ids"); auto* pre_scores = context.Input("pre_scores"); - PADDLE_ENFORCE_NOT_NULL(scores, - platform::errors::NotFound( - "Input(scores) of BeamSearchOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + scores, + phi::errors::NotFound("Input(scores) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( pre_ids, - platform::errors::NotFound( - "Input(pre_ids) of BeamSearchOp is not found.")); + phi::errors::NotFound("Input(pre_ids) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( pre_scores, - platform::errors::NotFound( + phi::errors::NotFound( "Input(pre_scores) of BeamSearchOp is not found.")); size_t level = context.Attr("level"); @@ -51,11 +50,11 @@ class BeamSearchOpKernel : public framework::OpKernel { auto* parent_idx = context.Output("parent_idx"); PADDLE_ENFORCE_NOT_NULL( selected_ids, - platform::errors::NotFound( + phi::errors::NotFound( "Output(selected_ids) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( selected_scores, - platform::errors::NotFound( + phi::errors::NotFound( "Output(selected_scores) of BeamSearchOp is not found.")); math::BeamSearchFunctor alg; diff --git a/paddle/fluid/operators/bernoulli_op.h b/paddle/fluid/operators/bernoulli_op.h index ffa2722ccbb60..f5ca225a49d26 100644 --- a/paddle/fluid/operators/bernoulli_op.h +++ b/paddle/fluid/operators/bernoulli_op.h @@ -25,14 +25,14 @@ namespace operators { template inline HOSTDEVICE T BernoulliFunctor(T p, T rand) { - PADDLE_ENFORCE_LE(p, - 1.0, - platform::errors::OutOfRange( - "The probability should be <= 1, but got %f", p)); - PADDLE_ENFORCE_GE(p, - 0.0, - platform::errors::OutOfRange( - "The probability should be >= 0, but got %f", p)); + PADDLE_ENFORCE_LE( + p, + 1.0, + phi::errors::OutOfRange("The probability should be <= 1, but got %f", p)); + PADDLE_ENFORCE_GE( + p, + 0.0, + phi::errors::OutOfRange("The probability should be >= 0, but got %f", p)); return static_cast(rand < p); } diff --git a/paddle/fluid/operators/bilateral_slice_op.cc b/paddle/fluid/operators/bilateral_slice_op.cc index 111f128fc3cc6..1b4624e3594f7 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cc +++ b/paddle/fluid/operators/bilateral_slice_op.cc @@ -37,7 +37,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim_x.size(), 4, - platform::errors::Unimplemented( + phi::errors::Unimplemented( "Input(X) dimension must be 4, but got dimension = %d .", dim_x.size())); @@ -58,7 +58,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { if (has_offset) { PADDLE_ENFORCE_EQ((coeffs_chans % (input_chans + 1)), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Slicing with affine offset, coefficients grid " "should have n_out*(n_in+1) channels, but got %d", coeffs_chans)); @@ -67,7 +67,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (coeffs_chans % input_chans), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Slicing without affine offset, coefficients grid " "should have n_out*n_in channels, but got %d .", coeffs_chans)); @@ -179,10 +179,10 @@ template class BilateralSliceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::Unimplemented( - "BilateralSlice only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::Unimplemented("BilateralSlice only supports GPU now.")); } }; diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc index b83ebc6f899ef..6b587aba3dbf1 100644 --- a/paddle/fluid/operators/cast_op.cc +++ b/paddle/fluid/operators/cast_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" @@ -94,7 +94,7 @@ class CastOp : public framework::OperatorWithKernel { auto *tensor = ctx.Input("X"); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The tensor of Input(X) is not initialized.")); auto &tensor_place = tensor->place(); // NOTE: cuda pinned tensor need to copy its data to target place diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc index 5e95d0cdda3f8..1d2ebec27334c 100644 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ b/paddle/fluid/operators/chunk_eval_op.cc @@ -53,7 +53,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( inference_dim, label_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Inference)'s shape must be the same as Input(Label)'s " "shape, but received [%s] (Inference) vs [%s] (Label).", inference_dim, @@ -65,7 +65,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel { (inference_dim.size() == 3 && inference_dim[2] == 1) || inference_dim.size() == 2, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "when Input(SeqLength) is provided, Input(Inference) " "should be of dim 3 (batch_size, bucket, 1) or dim 2 " "(batch_size, bucket), but received [%s].", @@ -73,7 +73,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel { auto seq_length_dim = ctx->GetInputDim("SeqLength"); PADDLE_ENFORCE_LE(seq_length_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SeqLength)'s rank should not be greater " "than 2, but received %d.", seq_length_dim.size())); diff --git a/paddle/fluid/operators/chunk_eval_op.h b/paddle/fluid/operators/chunk_eval_op.h index baad8719db37f..4b146176a43bc 100644 --- a/paddle/fluid/operators/chunk_eval_op.h +++ b/paddle/fluid/operators/chunk_eval_op.h @@ -57,7 +57,7 @@ class ChunkEvalKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( label[i], num_chunk_types * num_tag_types, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The value of Input(Label) should be less than the number of " "chunk types times the number of tag types, but received %d " "(Label) vs %d (chunk types) * %d (tag types).", @@ -178,7 +178,7 @@ class ChunkEvalKernel : public framework::OpKernel { tag_end = -1; tag_single = -1; } else { - PADDLE_THROW(platform::errors::InvalidArgument("Unknown chunk scheme.")); + PADDLE_THROW(phi::errors::InvalidArgument("Unknown chunk scheme.")); } other_chunk_type = num_chunk_types = context.Attr("num_chunk_types"); excluded_chunk_types.insert( @@ -244,13 +244,13 @@ class ChunkEvalKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support one level LoD sequence now, but received %d.", lod.size())); PADDLE_ENFORCE_EQ( lod, inference->lod(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Inference) and Input(Label) of Op(chunk_eval) should have " "same LoD information.")); num_sequences = lod[0].size() - 1; diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h index 3a7779ae83338..766e8e6d2620d 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h @@ -45,12 +45,12 @@ class CinnInstructionRunOpKernel : public framework::OpKernel { CinnCompiler::GetInstance()->GetCompiledObject(cached_index); const std::vector>& instructions = compiled_object.runtime_program->GetRunInstructions(); - PADDLE_ENFORCE_LT(ins_index, - instructions.size(), - platform::errors::InvalidArgument( - "Index(%ld) > instructions.size(%ld).", - ins_index, - instructions.size())); + PADDLE_ENFORCE_LT( + ins_index, + instructions.size(), + phi::errors::InvalidArgument("Index(%ld) > instructions.size(%ld).", + ins_index, + instructions.size())); auto&& instruction = instructions.at(ins_index); // step 2: prepare the input and output arguments of the instruction diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 734987ce92235..aefc3f8111e54 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -179,7 +179,7 @@ void CinnLaunchContext::BuildVarNameMap( PADDLE_ENFORCE_EQ( res.second, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Cinn variable(%s) maps to more than one paddle variable(%s,%s)", x.second, res.first->second, @@ -198,7 +198,7 @@ void CinnLaunchContext::BuildVarNameMap( PADDLE_ENFORCE_EQ( paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Size of variables is not equal, paddle[%ld] vs cinn[%ld]", paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size())); @@ -236,7 +236,7 @@ CinnTensor CinnLaunchContext::GetCinnTensorOfVar(const std::string& var_name) { PADDLE_ENFORCE_EQ( IsVariableUsed(var_name), true, - platform::errors::NotFound("Variable(%s) not applied in CINN", var_name)); + phi::errors::NotFound("Variable(%s) not applied in CINN", var_name)); const auto& arg_name = paddle2cinn_varmap_.at(var_name); return cinn_scope_->GetTensor(arg_name); } @@ -276,7 +276,7 @@ void CinnLaunchContext::CheckTensorEquivalent( const std::string& var_name, const phi::DenseTensor& paddle_tensor) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); // check dimension auto cinn_tensor = GetCinnTensorOfVar(var_name); @@ -309,7 +309,7 @@ void CinnLaunchContext::CheckTensorEquivalent( framework::paddle2cinn::TransToPaddleDataType(cinn_tensor->type()); PADDLE_ENFORCE_EQ(paddle_tensor.dtype(), cinn_dtype, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Tensors' dtype in variable(%s) are not equivalent, " "paddle is = [%s], but cinn is = [%s].", var_name, @@ -345,7 +345,7 @@ void CinnLaunchContext::InitializeArguments() { void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); std::string revise_var_name = RedirectVarName(var_name); @@ -372,7 +372,7 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); std::string revise_var_name = RedirectVarName(var_name); @@ -458,7 +458,7 @@ std::unique_ptr CinnLaunchContext::BuildCompiledProgram( PADDLE_ENFORCE_NE( res, cinn2paddle_varmap_.end(), - platform::errors::NotFound("Argument(%s) not found", arg)); + phi::errors::NotFound("Argument(%s) not found", arg)); var_names.emplace_back(res->second); } } @@ -592,8 +592,8 @@ cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar( PADDLE_ENFORCE_NE( res, paddle2argument_.end(), - platform::errors::NotFound("Variable(%s) not found in compilation result", - var_name)); + phi::errors::NotFound("Variable(%s) not found in compilation result", + var_name)); return static_cast(res->second); } diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cc index 9edb7348b125c..a9154910acf11 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op.cc @@ -41,7 +41,7 @@ const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place) { return ::cinn::common::DefaultNVGPUTarget(); } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "CINN is not supported on current place:%s", place)); return ::cinn::common::UnkTarget(); } diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index 2ce23dc965b31..4398c970f33ca 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -78,7 +78,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { // Step 1. Find graph object and prepare input PADDLE_ENFORCE_EQ(ctx.HasAttr(kCompilationKey), true, - platform::errors::NotFound( + phi::errors::NotFound( "No Attribute(%s) found for CinnLaunchOp operator.", kCompilationKey)); const auto& compilation_key = ctx.template Attr(kCompilationKey); diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index 3895bc09a08a0..4a61792c5b647 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -37,21 +37,21 @@ class ClipByNormOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of ClipByNormOp should not be null. Please " "check if it is created correctly.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ClipByNormOp should not be null. " "Please check if it is created correctly.")); auto max_norm = ctx->Attrs().Get("max_norm"); PADDLE_ENFORCE_GT( max_norm, 0, - platform::errors::InvalidArgument("max_norm should be greater than 0. " - "Received max_norm is %f.", - max_norm)); + phi::errors::InvalidArgument("max_norm should be greater than 0. " + "Received max_norm is %f.", + max_norm)); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/collective/alltoall_op.cc b/paddle/fluid/operators/collective/alltoall_op.cc index a8e3b2808092c..bd99fdde2f2c2 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cc @@ -28,7 +28,7 @@ class AllToAllBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); framework::DDim dim = ctx->GetInputDim("X"); if (dim[0] < 0) dim[0] = -1; @@ -79,4 +79,4 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index 8f942013435eb..93a44776851d4 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -45,7 +45,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -59,7 +59,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -70,7 +70,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -93,7 +93,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension size (%d) of the input tensor must be " "divisible by the number of ranks (%d).", x_dims[0], @@ -126,12 +126,11 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { VLOG(3) << "old NCCLCommContext has rid " << ring_id; } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -153,5 +152,5 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/alltoall_op.h b/paddle/fluid/operators/collective/alltoall_op.h index 61456c268d5d5..187d4965cdcc8 100644 --- a/paddle/fluid/operators/collective/alltoall_op.h +++ b/paddle/fluid/operators/collective/alltoall_op.h @@ -33,7 +33,7 @@ template class AllToAllOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support alltoall for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc index 04d409e82b4d5..dc6b701afee00 100644 --- a/paddle/fluid/operators/collective/barrier_op.cu.cc +++ b/paddle/fluid/operators/collective/barrier_op.cu.cc @@ -47,7 +47,7 @@ class BarrierOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -58,7 +58,7 @@ class BarrierOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); auto stream = comm_ctx->GetStream(); @@ -82,8 +82,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel { VLOG(3) << "old NCCLCommContext has rid " << rid; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should compile with NCCL.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should compile with NCCL.")); #endif } }; diff --git a/paddle/fluid/operators/collective/barrier_op.h b/paddle/fluid/operators/collective/barrier_op.h index b05f2de53a073..6bbd5c38a2f76 100644 --- a/paddle/fluid/operators/collective/barrier_op.h +++ b/paddle/fluid/operators/collective/barrier_op.h @@ -41,12 +41,12 @@ class BarrierOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::BarrierOptions opts(gloo->GetContext()); gloo::barrier(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index 2a0087cd8aa72..e67a2cccc16e9 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -26,10 +26,10 @@ class CAllGatherOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AllGather"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Input", "Out", "AllGather"); int nranks = ctx->Attrs().Get("nranks"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The value of nranks should be >=2.")); framework::DDim dim = ctx->GetInputDim("X"); // 0D use stack/unstack while others use concat/split if (dim.size() == 0) { @@ -85,4 +85,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, int64_t, uint8_t, bool, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index dcd88f4a311ee..7b57e7af25f9b 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -67,7 +67,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -78,7 +78,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -88,7 +88,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; @@ -112,7 +112,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -138,5 +138,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, int8_t, int64_t, bool, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index b4aff2c2363ec..c5e2088da9889 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -49,14 +49,14 @@ class CAllGatherOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::AllgatherOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); opts.setOutput(recv_buff, send_numel * nranks); gloo::allgather(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc index d31c120cf9ede..48e965894a294 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc @@ -65,7 +65,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -76,7 +76,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -86,7 +86,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old BKCLCommContext has rid " << rid; @@ -106,7 +106,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { comm->comm(), sendbuff, numel, recvbuff, dtype, stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU and bkcl.")); #endif } @@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, ops::CAllGatherOpXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t, uint8_t, diff --git a/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc index d3f0b45f64432..e859145df8b73 100644 --- a/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_avg, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index c47bf7025e1fd..d659be0f3d141 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -55,4 +55,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index 277988b56916f..012b280a9ab15 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, double, int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc index 8c648b4ae4a37..943df02ad93e2 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, ops::CAllReduceMaxXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cc index c21337a27202e..2a9dd023cf162 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cc @@ -56,4 +56,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc index 4475abdef281b..a3eec10051c52 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc index f9be16781af70..fb19a2924d1eb 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, ops::CAllReduceMinXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 55ca03c0bc626..db9d6d5361462 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -96,7 +96,7 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::AllreduceOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); @@ -123,14 +123,14 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { &gloo::product)); break; default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "Invalid reduce type: %d.", red_type)); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("Invalid reduce type: %d.", red_type)); } gloo::allreduce(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } @@ -150,11 +150,11 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -197,8 +197,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(out, *in, opts, false, true); @@ -215,7 +215,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -226,7 +226,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -262,8 +262,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -278,7 +278,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); #endif } @@ -297,11 +297,11 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -345,8 +345,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(out, *in, opts, false, true); @@ -363,7 +363,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -374,7 +374,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -420,8 +420,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { #endif default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -436,7 +436,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index ee40f29d789e1..181b78b545e7c 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -56,4 +56,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc index c63a1d2182678..e2c0a71a9ced4 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc index 5558b1722093a..d3696c2c5dfc1 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, ops::CAllReduceProdXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc index 79e70757fbcfd..80b97b2bc70cb 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc @@ -77,4 +77,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 76d809cd234f0..909bd23db2413 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, double, int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc index 1d4c5f63b5850..21bedcff8774b 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, ops::CAllReduceSumXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cc b/paddle/fluid/operators/collective/c_broadcast_op.cc index 670b69c05701c..27f3a1bcdc29f 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cc @@ -73,4 +73,4 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index 4d49bc4990c6e..98f9102f2d8f0 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -80,7 +80,7 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { out->set_lod(x->lod()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -103,5 +103,5 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_broadcast_op.h b/paddle/fluid/operators/collective/c_broadcast_op.h index e0d6158f19db7..c02b8f8a9a4fe 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.h +++ b/paddle/fluid/operators/collective/c_broadcast_op.h @@ -59,7 +59,7 @@ class CBroadcastOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::BroadcastOptions opts(gloo->GetContext()); opts.setOutput(recv_buff, send_numel); @@ -67,7 +67,7 @@ class CBroadcastOpCPUKernel : public framework::OpKernel { gloo::broadcast(opts); } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc index 6bf9d956a342e..ac7d9623e3241 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc @@ -50,7 +50,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -61,7 +61,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -119,7 +119,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { out->Resize(x->dims()); out->set_lod(x->lod()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU and BKCL.")); #endif } @@ -137,6 +137,6 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, ops::CBroadcastOpXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc index 14059c3d91027..ca0a45c8ae79c 100644 --- a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc @@ -54,7 +54,7 @@ class CCommInitMultiTrainerOp : public framework::OperatorBase { const platform::Place& place) const override { auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input X must be provided.")); + var, phi::errors::InvalidArgument("Input X must be provided.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) ncclUniqueId* nccl_id = var->GetMutable(); @@ -70,8 +70,8 @@ class CCommInitMultiTrainerOp : public framework::OperatorBase { platform::NCCLCommContext::Instance().CreateNCCLCommMultiTrainer( devices, nccl_id, ntrainers, train_id, rid); #else - PADDLE_THROW(platform::errors::Unimplemented( - "PaddlePaddle should compile with GPU.")); + PADDLE_THROW( + phi::errors::Unimplemented("PaddlePaddle should compile with GPU.")); #endif } }; diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index 172e330675033..5c6613a0e9ca3 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -65,7 +65,7 @@ class CCommInitOp : public framework::OperatorBase { #if defined(PADDLE_WITH_CUSTOM_DEVICE) auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input con not be empty.")); + var, phi::errors::InvalidArgument("Input con not be empty.")); int nranks = Attr("nranks"); int rid = Attr("ring_id"); @@ -87,7 +87,7 @@ class CCommInitOp : public framework::OperatorBase { "c_comm_init_op"); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with custom device.")); #endif } else { @@ -99,21 +99,21 @@ class CCommInitOp : public framework::OperatorBase { using UniqueId = BKCLUniqueId; using CommContext = platform::BKCLCommContext; #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with GPU or XPU.")); #endif PADDLE_ENFORCE_EQ( platform::is_gpu_place(place) || platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "CCommInitOp can run on gpu or xpu place only.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input con not be empty.")); + var, phi::errors::InvalidArgument("Input con not be empty.")); int nranks = Attr("nranks"); int rid = Attr("ring_id"); diff --git a/paddle/fluid/operators/collective/c_concat_op.cc b/paddle/fluid/operators/collective/c_concat_op.cc index 27c1141f8b67f..75db7e9fad427 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cc @@ -27,29 +27,29 @@ class CConcatOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The number of ranks (%d) for c_concat " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The number of ranks (%d) for c_concat " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_concat must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( rank, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for c_concat must be non-negative.", rank)); - PADDLE_ENFORCE_LT(rank, - nranks, - platform::errors::InvalidArgument( - "The value of rank (%d) for c_concat must " - "be less than that of nranks.", - rank, - nranks)); + PADDLE_ENFORCE_LT( + rank, + nranks, + phi::errors::InvalidArgument("The value of rank (%d) for c_concat must " + "be less than that of nranks.", + rank, + nranks)); framework::DDim dim = ctx->GetInputDim("X"); dim[dim.size() - 1] = dim[dim.size() - 1] * nranks; @@ -121,4 +121,4 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index b75b2d4b0f687..9ed68c7c6809b 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -47,19 +47,19 @@ class CConcatOpCUDAKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -95,7 +95,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -107,7 +107,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -117,7 +117,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; @@ -156,7 +156,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { auto& dev_ctx2 = ctx.template device_context(); functor(dev_ctx2, inputs, axis, out); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -178,5 +178,5 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_concat_op.h b/paddle/fluid/operators/collective/c_concat_op.h index 39bdc4c2740de..84edccffc6fa3 100644 --- a/paddle/fluid/operators/collective/c_concat_op.h +++ b/paddle/fluid/operators/collective/c_concat_op.h @@ -29,7 +29,7 @@ template class CConcatOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support c_concat for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/c_concat_op_xpu.cc b/paddle/fluid/operators/collective/c_concat_op_xpu.cc index 10a2624ae83a4..c8361965de3e4 100644 --- a/paddle/fluid/operators/collective/c_concat_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_concat_op_xpu.cc @@ -46,19 +46,19 @@ class CConcatOpXPUKernel : public framework::OpKernel { int rid = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -95,7 +95,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -107,7 +107,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -118,7 +118,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old BKCLCommContext has rid " << rid; @@ -151,7 +151,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { dev_ctx.template Alloc(out, x->dtype()); functor(dev_ctx, inputs, axis, out); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with XPU.")); #endif } @@ -169,4 +169,4 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, float, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_embedding_op.cc b/paddle/fluid/operators/collective/c_embedding_op.cc index 86e882b1c6cc8..0bbd64abb10d5 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cc +++ b/paddle/fluid/operators/collective/c_embedding_op.cc @@ -33,7 +33,7 @@ class CEmbeddingOp : public framework::OperatorWithKernel { VLOG(5) << "ids rank is " << ids_rank << std::endl; PADDLE_ENFORCE_EQ(table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of the 'c_embedding' must be 2. " "But received c_embedding's dimensions = %d, " "c_embedding's shape = [%s].", @@ -57,7 +57,7 @@ class CEmbeddingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (height > 0 && width > 0 && start_idx >= 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "height:%ld width:%ld start_idx:%ld must not have negative values", height, width, @@ -133,10 +133,10 @@ class CEmbeddingOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("W"), table_dims); // check valid - PADDLE_ENFORCE_EQ(table_dims.size(), - 2, - platform::errors::InvalidArgument( - "Only accept the dims of table_t == 2")); + PADDLE_ENFORCE_EQ( + table_dims.size(), + 2, + phi::errors::InvalidArgument("Only accept the dims of table_t == 2")); const int64_t start_idx = ctx->Attrs().Get("start_index"); const int64_t height = table_dims[0]; @@ -145,7 +145,7 @@ class CEmbeddingOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (height > 0 && width > 0 && start_idx >= 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "height:%ld width:%ld start_idx:%ld must not have negative values", height, width, diff --git a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc index a7a234f5792ef..3d469b81609f8 100644 --- a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc @@ -31,10 +31,10 @@ namespace operators { static void GenBKCLID(std::vector* bkcl_ids) { for (size_t i = 0; i < bkcl_ids->size(); ++i) { BKCLResult_t ret = bkcl_get_unique_id(&(*bkcl_ids)[i]); - PADDLE_ENFORCE_EQ(BKCL_SUCCESS, - ret, - platform::errors::PreconditionNotMet( - "bkcl get unique id failed [%d]", ret)); + PADDLE_ENFORCE_EQ( + BKCL_SUCCESS, + ret, + phi::errors::PreconditionNotMet("bkcl get unique id failed [%d]", ret)); } } @@ -46,8 +46,8 @@ static void CopyBKCLIDToVar(const std::vector& bkcl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto bkcl_id = var->GetMutable(); memcpy(bkcl_id, &bkcl_ids[i], sizeof(BKCLUniqueId)); } diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index c66aedd3b3923..f7f92a0a574df 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -42,8 +42,8 @@ static void CopyNCCLIDToVar(const std::vector& nccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto nccl_id = var->GetMutable(); memcpy(nccl_id, &nccl_ids[i], sizeof(ncclUniqueId)); } diff --git a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc index e404a1357ee75..c24fb4964b336 100644 --- a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc @@ -36,8 +36,8 @@ static void CopyXCCLIDToVar(const std::vector& xccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto xccl_id = var->GetMutable(); *xccl_id = xccl_ids[i]; } diff --git a/paddle/fluid/operators/collective/c_identity_op.cc b/paddle/fluid/operators/collective/c_identity_op.cc index c067c061b8613..78d4a27f822b4 100644 --- a/paddle/fluid/operators/collective/c_identity_op.cc +++ b/paddle/fluid/operators/collective/c_identity_op.cc @@ -31,7 +31,7 @@ class CIdentityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_identity must be non-negative.", ring_id)); framework::DDim dim = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", dim); diff --git a/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc index 07d2cc748900e..1dcd5a2c6489c 100644 --- a/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_avg, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cc index a1509a89eb3b3..a0181c9f0e7af 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc index 8973de0a19675..24f3dffd0517e 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cc index 9b53d80e01607..621272895fe4c 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cc @@ -52,4 +52,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc index e3239cb812cd9..c7d979bd932b6 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index d90fb88fe8f3f..0ea4187ffc4f2 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -85,7 +85,7 @@ class CReduceOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::ReduceOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); @@ -113,14 +113,14 @@ class CReduceOpCPUKernel : public framework::OpKernel { &gloo::product)); break; default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "Invalid reduce type: %d.", red_type)); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("Invalid reduce type: %d.", red_type)); } gloo::reduce(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } @@ -158,7 +158,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -169,7 +169,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -205,8 +205,8 @@ class CReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -222,7 +222,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); #endif } @@ -260,7 +260,7 @@ class CReduceOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -271,7 +271,7 @@ class CReduceOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -311,11 +311,11 @@ class CReduceOpCUDAKernel : public framework::OpKernel { #endif default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "red_type must be one of kRedSum, " - "kRedMax, kRedMin, kRedProd.")); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("red_type must be one of kRedSum, " + "kRedMax, kRedMin, kRedProd.")); } if (comm_ctx) { @@ -331,10 +331,10 @@ class CReduceOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::Unavailable( - "PaddlePaddle should compile with GPU..")); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::Unavailable("PaddlePaddle should compile with GPU..")); #endif } }; diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cc index 20dacd19b382b..c34e799f5d8e1 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc index 675c274eb0638..b8b562031bc4e 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cc index 72be5c391fca2..5bf5c1c2f8b9f 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc index dfae966a35eb0..56fd0e1293389 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_sum, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cc index 11c0094340f08..7726c3bf5ca41 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cc @@ -32,7 +32,7 @@ class CReduceScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dim[0] (%d) is not divisible by nranks(%d)", dim[0], nranks)); dim[0] /= nranks; } @@ -81,4 +81,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 617a8f7b7f941..e00433ad7b4d6 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -46,7 +46,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -57,12 +57,12 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); PADDLE_ENFORCE_EQ(out_dims[0] % comm_ctx->GetSize(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -74,7 +74,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { comm = platform::NCCLCommContext::Instance().Get(rid, place); PADDLE_ENFORCE_EQ(out_dims[0] % comm->nranks(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -90,7 +90,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { int nranks = comm_ctx ? comm_ctx->GetSize() : comm->nranks(); PADDLE_ENFORCE_EQ(out_dims[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -117,7 +117,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -140,5 +140,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.h b/paddle/fluid/operators/collective/c_reducescatter_op.h index 52af0b9c43541..9f978f3f94bf3 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.h +++ b/paddle/fluid/operators/collective/c_reducescatter_op.h @@ -31,7 +31,7 @@ template class CReduceScatterOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented cpu kernel for CReduceScatterOp.")); } }; diff --git a/paddle/fluid/operators/collective/c_scatter_op.cc b/paddle/fluid/operators/collective/c_scatter_op.cc index 40b6eeacf8030..d3caf13485036 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cc @@ -29,20 +29,20 @@ class CScatterOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) must be greater than 1 " "to use collective op (c_scatter op).", nranks)); PADDLE_ENFORCE_GE( root_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The root_id (%d) for c_scatter_op must be non-negative.", root_id)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_scatter_op must be non-negative.", root_id)); framework::DDim dim = ctx->GetInputDim("X"); @@ -96,4 +96,4 @@ PD_REGISTER_STRUCT_KERNEL(c_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc index fc7a83ca638ee..7cfe5b6785b5a 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc @@ -47,13 +47,13 @@ class CScatterOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( root_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The root_id (%d) for c_scatter_op must be non-negative.", root_id)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_scatter_op must be non-negative.", ring_id)); @@ -62,7 +62,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,12 +73,12 @@ class CScatterOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); PADDLE_ENFORCE_EQ(nranks, comm_ctx->GetSize(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) you set of must " "be equal to comm_ctx->GetSize() (%d).", nranks, @@ -90,7 +90,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { comm = platform::NCCLCommContext::Instance().Get(ring_id, place); PADDLE_ENFORCE_EQ(nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) you set of must " "be equal to comm->nranks (%d).", nranks, @@ -158,7 +158,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( true, false, - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -177,4 +177,4 @@ PD_REGISTER_STRUCT_KERNEL(c_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_scatter_op.h b/paddle/fluid/operators/collective/c_scatter_op.h index 76f3350a64c05..164b7f156de0a 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.h +++ b/paddle/fluid/operators/collective/c_scatter_op.h @@ -44,7 +44,7 @@ class CScatterOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); int64_t send_numel = out->numel(); @@ -66,7 +66,7 @@ class CScatterOpCPUKernel : public framework::OpKernel { gloo::scatter(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc index e4de0ceb136c1..496733759adb3 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc @@ -46,7 +46,7 @@ class CSoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { if (ctx->IsRuntime() || (logits_dims[i] > 0 && labels_dims[i] > 0)) { PADDLE_ENFORCE_EQ(logits_dims[i], labels_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Logits) and Input(Label) should in " "same shape in dimensions except axis.")); } @@ -56,7 +56,7 @@ class CSoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( labels_dims[logits_rank - 1], 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the last dimension of Input(Label) should be 1." "But received: the last dimension of Input(Label) is [%d]," "the last dimension is [%d]", @@ -130,22 +130,22 @@ class CSoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Loss")), - true, - platform::errors::InvalidArgument( - "Input(Loss@Grad) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Softmax"), - true, - platform::errors::InvalidArgument( - "Input(Softmax) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Loss")), + true, + phi::errors::InvalidArgument("Input(Loss@Grad) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Softmax"), + true, + phi::errors::InvalidArgument("Input(Softmax) should be not null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Label"), true, - platform::errors::InvalidArgument("Input(Label) should be not null.")); + phi::errors::InvalidArgument("Input(Label) should be not null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Logits")), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Logits@Grad) should be not null.")); ctx->SetOutputDim(framework::GradVarName("Logits"), @@ -209,4 +209,4 @@ PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy, ops::CSoftmaxWithCrossEntropyOpCPUKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index e65ebafad7235..80ce7ce50c4a0 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -155,7 +155,7 @@ struct CSoftmaxWithCrossEntropyFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -166,7 +166,7 @@ struct CSoftmaxWithCrossEntropyFunctor { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -551,11 +551,11 @@ PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy, ops::CSoftmaxWithCrossEntropyOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy_grad, GPU, ALL_LAYOUT, ops::CSoftmaxWithCrossEntropyGradCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h index 9b6a2c86897cb..3689cbcefd9bd 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h @@ -33,7 +33,7 @@ template class CSoftmaxWithCrossEntropyOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support c_embedding for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc index 499b25e65974b..65329ccd8b269 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc @@ -278,7 +278,7 @@ struct CSoftmaxWithCrossEntropyFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -289,7 +289,7 @@ struct CSoftmaxWithCrossEntropyFunctor { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/collective/c_split_op.cc b/paddle/fluid/operators/collective/c_split_op.cc index dd65b99e3b7ee..f684c6fe35cf9 100644 --- a/paddle/fluid/operators/collective/c_split_op.cc +++ b/paddle/fluid/operators/collective/c_split_op.cc @@ -27,38 +27,38 @@ class CSplitOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The number of ranks (%d) for c_split " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The number of ranks (%d) for c_split " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_split must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( rank, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for c_split must be non-negative.", rank)); - PADDLE_ENFORCE_LT(rank, - nranks, - platform::errors::InvalidArgument( - "The value of rank (%d) for c_split must " - "be less than that of nranks.", - rank, - nranks)); + PADDLE_ENFORCE_LT( + rank, + nranks, + phi::errors::InvalidArgument("The value of rank (%d) for c_split must " + "be less than that of nranks.", + rank, + nranks)); framework::DDim dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ( dim[dim.size() - 1] % nranks, 0, - platform::errors::InvalidArgument("The last dimension (%d) of the X " - "should be divisible by nranks (%d)", - dim[dim.size() - 1], - nranks)); + phi::errors::InvalidArgument("The last dimension (%d) of the X " + "should be divisible by nranks (%d)", + dim[dim.size() - 1], + nranks)); dim[dim.size() - 1] = dim[dim.size() - 1] / nranks; if (dim[0] < 0) dim[0] = -1; diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc index 526726ae3c772..8d1134be70de1 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc @@ -24,5 +24,5 @@ PD_REGISTER_STRUCT_KERNEL(c_sync_calc_stream, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.h b/paddle/fluid/operators/collective/c_sync_calc_stream_op.h index e100397924af5..a0e2d858ebd38 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.h +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.h @@ -51,14 +51,14 @@ class CSyncCalcStreamKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Sync stream op can run on xpu place only for now.")); auto dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(place)); dev_ctx->Wait(); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc index 24157f1c64a6c..1448e1e3745ec 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc @@ -25,4 +25,4 @@ PD_REGISTER_STRUCT_KERNEL(c_sync_calc_stream, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.h b/paddle/fluid/operators/collective/c_sync_comm_stream_op.h index d5fdad8f04f86..d67ef6820d021 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.h +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.h @@ -50,7 +50,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,7 +73,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Sync stream op can run on xpu place only for now.")); int ring_id = ctx.Attr("ring_id"); XPUStream stream = nullptr; @@ -82,7 +82,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -102,7 +102,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { platform::XPUStreamSync(stream); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU or XPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_wait_comm_op.cc b/paddle/fluid/operators/collective/c_wait_comm_op.cc index fbb9c0d1ca7ce..10f4d9726f21b 100644 --- a/paddle/fluid/operators/collective/c_wait_comm_op.cc +++ b/paddle/fluid/operators/collective/c_wait_comm_op.cc @@ -43,7 +43,7 @@ class CWaitCommOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( platform::is_gpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "wait_comm op can run on gpu place only for now, but got %s", place.DebugString())); @@ -62,7 +62,7 @@ class CWaitCommOp : public framework::OperatorBase { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -94,7 +94,7 @@ class CWaitCommOp : public framework::OperatorBase { PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(compute_stream, event, 0)); #endif #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_wait_compute_op.cc b/paddle/fluid/operators/collective/c_wait_compute_op.cc index 040e86c46b9ec..a548998ce757d 100644 --- a/paddle/fluid/operators/collective/c_wait_compute_op.cc +++ b/paddle/fluid/operators/collective/c_wait_compute_op.cc @@ -43,7 +43,7 @@ class CWaitComputeOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( platform::is_gpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "wait_compute op can run on gpu place only for now, but got %s", place.DebugString())); @@ -62,7 +62,7 @@ class CWaitComputeOp : public framework::OperatorBase { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -94,7 +94,7 @@ class CWaitComputeOp : public framework::OperatorBase { PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(comm_stream, event, 0)); #endif #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc index fc765e3bde983..f7aa3baea0d60 100644 --- a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc @@ -34,10 +34,10 @@ namespace operators { static void GenBKCLID(std::vector* bkcl_ids) { for (size_t i = 0; i < bkcl_ids->size(); ++i) { BKCLResult_t ret = bkcl_get_unique_id(&(*bkcl_ids)[i]); - PADDLE_ENFORCE_EQ(BKCL_SUCCESS, - ret, - platform::errors::PreconditionNotMet( - "bkcl get unique id failed [%d]", ret)); + PADDLE_ENFORCE_EQ( + BKCL_SUCCESS, + ret, + phi::errors::PreconditionNotMet("bkcl get unique id failed [%d]", ret)); } } @@ -49,8 +49,8 @@ static void CopyBKCLIDToVar(const std::vector& bkcl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto bkcl_id = var->GetMutable(); memcpy(bkcl_id, &bkcl_ids[i], sizeof(BKCLUniqueId)); } @@ -74,14 +74,14 @@ class GenBKCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( trainer_id, 0, - platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + phi::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), - platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " - "range is [0, trainer_size)", - trainer_id)); + phi::errors::OutOfRange("trainer_id %d is out of range. Its valid " + "range is [0, trainer_size)", + trainer_id)); int bkcl_comm_num = Attr("bkcl_comm_num"); int use_hierarchical_allreduce = Attr("use_hierarchical_allreduce"); @@ -93,18 +93,18 @@ class GenBKCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( trainers.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of collective trainers %llu <= 1", trainers.size())); PADDLE_ENFORCE_GT( inter_nranks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "inter_nranks %d <= 1 while in hierarchical allreduce mode", inter_nranks)); PADDLE_ENFORCE_EQ( trainers.size() % inter_nranks, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of trainers %llu mod inter_nranks %d is not equal 0", trainers.size(), inter_nranks)); diff --git a/paddle/fluid/operators/collective/gen_nccl_id_op.cc b/paddle/fluid/operators/collective/gen_nccl_id_op.cc index 1d03cb151e4a0..37406b2918d7f 100644 --- a/paddle/fluid/operators/collective/gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_nccl_id_op.cc @@ -49,8 +49,8 @@ static void CopyNCCLIDToVar(const std::vector& nccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto nccl_id = var->GetMutable(); memcpy(nccl_id, &nccl_ids[i], sizeof(ncclUniqueId)); } @@ -74,14 +74,14 @@ class GenNCCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( trainer_id, 0, - platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + phi::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), - platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " - "range is [0, trainer_size)", - trainer_id)); + phi::errors::OutOfRange("trainer_id %d is out of range. Its valid " + "range is [0, trainer_size)", + trainer_id)); int nccl_comm_num = Attr("nccl_comm_num"); int use_hierarchical_allreduce = Attr("use_hierarchical_allreduce"); @@ -93,18 +93,18 @@ class GenNCCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( trainers.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of collective trainers %llu <= 1", trainers.size())); PADDLE_ENFORCE_GT( inter_nranks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "inter_nranks %d <= 1 while in hierarchical allreduce mode", inter_nranks)); PADDLE_ENFORCE_EQ( trainers.size() % inter_nranks, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of trainers %llu mod inter_nranks %d is not equal 0", trainers.size(), inter_nranks)); diff --git a/paddle/fluid/operators/collective/global_gather_op.cc b/paddle/fluid/operators/collective/global_gather_op.cc index de93ca747b4e9..1b74fc6bde5f7 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cc @@ -32,18 +32,18 @@ class GlobalGatherOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto input_dims = ctx->GetInputDim("X"); auto ndim_input = input_dims.size(); // dim check - PADDLE_ENFORCE_EQ(ndim_input, - 2, - platform::errors::InvalidArgument( - "The input tensor's dimension must be 2. " - "But received input's dimension = %d.", - ndim_input)); + PADDLE_ENFORCE_EQ( + ndim_input, + 2, + phi::errors::InvalidArgument("The input tensor's dimension must be 2. " + "But received input's dimension = %d.", + ndim_input)); framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); } @@ -119,4 +119,4 @@ PD_REGISTER_STRUCT_KERNEL(global_gather, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index b3dcc2aac9423..8c0285cba049d 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -41,11 +41,11 @@ struct GlobalGatherFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -80,7 +80,7 @@ struct GlobalGatherFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -94,7 +94,7 @@ struct GlobalGatherFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -105,7 +105,7 @@ struct GlobalGatherFunctor { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -192,12 +192,11 @@ struct GlobalGatherFunctor { } } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -215,11 +214,11 @@ struct GlobalGatherProcessGroupFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -251,7 +250,7 @@ struct GlobalGatherProcessGroupFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -309,12 +308,11 @@ struct GlobalGatherProcessGroupFunctor { #endif #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -349,4 +347,4 @@ PD_REGISTER_STRUCT_KERNEL(global_gather, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_gather_op.h b/paddle/fluid/operators/collective/global_gather_op.h index 723c5e48a5ae4..0ab3dd5da985f 100644 --- a/paddle/fluid/operators/collective/global_gather_op.h +++ b/paddle/fluid/operators/collective/global_gather_op.h @@ -29,7 +29,7 @@ template class GlobalGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support global gather op for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/global_scatter_op.cc b/paddle/fluid/operators/collective/global_scatter_op.cc index 095f968306bdc..e6b1bb8295bde 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cc @@ -34,18 +34,18 @@ class GlobalScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); auto input_dims = ctx->GetInputDim("X"); auto ndim_input = input_dims.size(); // dim check - PADDLE_ENFORCE_EQ(ndim_input, - 2, - platform::errors::InvalidArgument( - "The input tensor's dimension must be 2. " - "But received input's dimension = %d.", - ndim_input)); + PADDLE_ENFORCE_EQ( + ndim_input, + 2, + phi::errors::InvalidArgument("The input tensor's dimension must be 2. " + "But received input's dimension = %d.", + ndim_input)); framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); @@ -123,4 +123,4 @@ PD_REGISTER_STRUCT_KERNEL(global_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index 096c33c3ef3cc..1eeb23fa602e2 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -42,11 +42,11 @@ struct GlobalScatterFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -79,7 +79,7 @@ struct GlobalScatterFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); @@ -95,7 +95,7 @@ struct GlobalScatterFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -106,7 +106,7 @@ struct GlobalScatterFunctor { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -201,12 +201,11 @@ struct GlobalScatterFunctor { } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -224,11 +223,11 @@ struct GlobalScatterProcessGroupFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -258,7 +257,7 @@ struct GlobalScatterProcessGroupFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); @@ -316,12 +315,11 @@ struct GlobalScatterProcessGroupFunctor { #endif #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -356,4 +354,4 @@ PD_REGISTER_STRUCT_KERNEL(global_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_scatter_op.h b/paddle/fluid/operators/collective/global_scatter_op.h index fc4b48500c071..36ea0b151dc4b 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.h +++ b/paddle/fluid/operators/collective/global_scatter_op.h @@ -29,7 +29,7 @@ template class GlobalScatterOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support global scatter op for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc index f680818da2d94..d30d52821e74e 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc @@ -96,4 +96,4 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc index b4773a8eb5456..fc856ea04e6f2 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc index 9638bf68d1717..323d39f62092e 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, ops::CAllReduceSumXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cc b/paddle/fluid/operators/collective/partial_allgather_op.cc index 75220ea5b30a5..3ae33ecd9eeba 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cc @@ -26,14 +26,14 @@ class PartialAllGatherOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The value of nranks should be >=2.")); PADDLE_ENFORCE_EQ( (rank >= 0 && rank < nranks), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for partial_allgather op must >=0 and { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -67,7 +67,7 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -87,17 +87,17 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, real_nranks, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, real_nranks)); PADDLE_ENFORCE_EQ(rank, real_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rank: %s should equal to %s", rank, real_rank)); PADDLE_ENFORCE_EQ( (numel % nranks), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input numel (%d) must be divisible by nranks(%d)", numel, nranks)); @@ -137,7 +137,7 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { } } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -160,5 +160,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_allgather_op.h b/paddle/fluid/operators/collective/partial_allgather_op.h index 178545f4dd2d3..4b410154712e2 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.h +++ b/paddle/fluid/operators/collective/partial_allgather_op.h @@ -30,7 +30,7 @@ template class PartialAllGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_allgather for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/partial_recv_op.cc b/paddle/fluid/operators/collective/partial_recv_op.cc index 5d8a1276a630e..2a512260a792d 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cc @@ -34,26 +34,26 @@ class PartialRecvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_recv op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_recv op must be non-negative.", ring_id)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and SetOutputDim("Out", common::make_ddim(out_shape)); @@ -137,4 +137,4 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index 912de046b63af..7e623706b2037 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -49,26 +49,26 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_recv op must be non-negative.", rid)); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_recv op must be non-negative.", peer)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_recv op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_recv op must >=0 and { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -110,7 +110,7 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -134,13 +134,13 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { stream = ctx.cuda_device_context().stream(); } - PADDLE_ENFORCE_LT(peer, - nranks, - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than nranks (%d).", - peer, - nranks)); + PADDLE_ENFORCE_LT( + peer, + nranks, + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than nranks (%d).", + peer, + nranks)); ncclDataType_t dtype = platform::ToNCCLDataType(type); @@ -161,7 +161,7 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { << offset << "] from " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should be compiled with NCCL and " "NCCL version >= 2.7.3 is needed.")); #endif @@ -185,5 +185,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_recv_op.h b/paddle/fluid/operators/collective/partial_recv_op.h index baf47ef9dff8d..0840b85e504b4 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.h +++ b/paddle/fluid/operators/collective/partial_recv_op.h @@ -28,7 +28,7 @@ template class PartialRecvOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_recv for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/partial_send_op.cc b/paddle/fluid/operators/collective/partial_send_op.cc index a655479d3d8af..388ece7f4ba12 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cc @@ -31,22 +31,22 @@ class PartialSendOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_send op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_send op must be non-negative.", ring_id)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_send op must be non-negative.", rid)); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_send op must be non-negative.", peer)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -104,7 +104,7 @@ class PartialSendCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -128,13 +128,13 @@ class PartialSendCUDAKernel : public framework::OpKernel { stream = ctx.cuda_device_context().stream(); } - PADDLE_ENFORCE_LT(peer, - nranks, - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than ranks (%d).", - peer, - nranks)); + PADDLE_ENFORCE_LT( + peer, + nranks, + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than ranks (%d).", + peer, + nranks)); ncclDataType_t dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); @@ -157,9 +157,9 @@ class PartialSendCUDAKernel : public framework::OpKernel { << offset << "] to " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should be compiled with NCCL " - "and NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should be compiled with NCCL " + "and NCCL version >= 2.7.3 is needed.")); #endif } }; @@ -181,5 +181,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_send, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_send_op.h b/paddle/fluid/operators/collective/partial_send_op.h index b7b72789b87ff..9076ce014fcab 100644 --- a/paddle/fluid/operators/collective/partial_send_op.h +++ b/paddle/fluid/operators/collective/partial_send_op.h @@ -29,7 +29,7 @@ template class PartialSendOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_send for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/recv_v2_op.cc b/paddle/fluid/operators/collective/recv_v2_op.cc index 40757ca89daa8..1448aad5f9bfa 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cc @@ -30,12 +30,12 @@ class RecvOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for recv_v2 op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for recv_v2 op must be non-negative.", ring_id)); if (ctx->GetOutputsVarType("Out").front() == @@ -44,7 +44,7 @@ class RecvOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( out_shape.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of the output shape must be greater than 0 " "but the value given is %d.", out_shape.size())); @@ -55,7 +55,7 @@ class RecvOpV2 : public framework::OperatorWithKernel { for (size_t i = 0; i < out_shape.size(); ++i) { PADDLE_ENFORCE_GE(out_shape[i], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape attribute for recv_v2 must be set " "explicitly, but the %dth element is %d which " "is less than 1. Or dynamic_shape should be " @@ -122,4 +122,4 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index 37cbf9dffdd3d..be849d7e6c53b 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -41,7 +41,7 @@ framework::DDim recv_shape_info(const platform::Place &place, PADDLE_ENFORCE_EQ( ((stream != nullptr && comm != nullptr) || comm_ctx != nullptr), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "NCCLComm and Stream should be provided if use NCCL " "to send the shape info.")); } @@ -131,14 +131,14 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for recv_v2 op must be non-negative.", rid)); int peer = ctx.Attr("peer"); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for recv_v2 op must be non-negative.", peer)); gpuStream_t stream = nullptr; @@ -180,7 +180,7 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -191,20 +191,20 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); VLOG(3) << "new comm_context_manager has rid " << rid; } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; } @@ -223,8 +223,8 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dynamic_shape, false, - platform::errors::InvalidArgument("Dynamic shape for send/recv not " - "support LoDTensorArray for now.")); + phi::errors::InvalidArgument("Dynamic shape for send/recv not " + "support LoDTensorArray for now.")); auto out_array = out_var->GetMutable(); for (size_t idx = 0; idx < out_array->size(); ++idx) { VLOG(3) << "LodTensorArray: idx(" << idx << ")"; @@ -267,20 +267,20 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { comm_ctx->Recv(out, numel, peer, stream); } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclRecv( out->data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " recv " << common::product(out->dims()) << " from " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should be compiled with NCCL and " "NCCL version >= 2.7.3 is needed.")); #endif @@ -305,5 +305,5 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, int, int64_t, int8_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/recv_v2_op.h b/paddle/fluid/operators/collective/recv_v2_op.h index e76e4a7b55197..47b1941a73442 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.h +++ b/paddle/fluid/operators/collective/recv_v2_op.h @@ -28,8 +28,8 @@ template class RecvOpV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( - "Do not support recv for cpu kernel now.")); + PADDLE_THROW( + phi::errors::Unavailable("Do not support recv for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/send_v2_op.cc b/paddle/fluid/operators/collective/send_v2_op.cc index 862a6a67813c1..c1763a5cd6478 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cc @@ -28,12 +28,12 @@ class SendOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for send_v2 op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for send_v2 op must be non-negative.", ring_id)); } @@ -94,4 +94,4 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 8c72a7ccd384c..6938f413b0548 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -41,7 +41,7 @@ void send_shape_info(const phi::DenseTensor& x, PADDLE_ENFORCE_EQ( ((stream != nullptr && comm != nullptr) || comm_ctx != nullptr), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "NCCLComm and Stream should be provided if use NCCL " "to send the shape info.")); } @@ -129,14 +129,14 @@ class SendOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for send_v2 op must be non-negative.", rid)); int peer = ctx.Attr("peer"); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for send_v2 op must be non-negative.", peer)); auto map = distributed::ProcessGroupMapFromGid::getInstance(); if (map->has(rid)) { @@ -171,7 +171,7 @@ class SendOpV2CUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -182,20 +182,20 @@ class SendOpV2CUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); VLOG(3) << "new comm_context_manager has rid " << rid; } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; } @@ -210,8 +210,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dynamic_shape, false, - platform::errors::InvalidArgument("Dynamic shape for send/recv not " - "support LoDTensorArray for now.")); + phi::errors::InvalidArgument("Dynamic shape for send/recv not " + "support LoDTensorArray for now.")); auto& x_array = x_var->Get(); for (size_t idx = 0; idx < x_array.size(); idx++) { VLOG(3) << "LodTensorArray: idx(" << idx << ")"; @@ -255,9 +255,9 @@ class SendOpV2CUDAKernel : public framework::OpKernel { << common::product(x->dims()) << " to " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should be compiled with NCCL " - "and NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should be compiled with NCCL " + "and NCCL version >= 2.7.3 is needed.")); #endif } }; @@ -280,5 +280,5 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, int, int64_t, int8_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/send_v2_op.h b/paddle/fluid/operators/collective/send_v2_op.h index 7f51861008942..196e2941e9315 100644 --- a/paddle/fluid/operators/collective/send_v2_op.h +++ b/paddle/fluid/operators/collective/send_v2_op.h @@ -29,8 +29,8 @@ template class SendOpV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( - "Do not support send for cpu kernel now.")); + PADDLE_THROW( + phi::errors::Unavailable("Do not support send for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index 1c13f873818f4..0c83eeb6da92e 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -37,13 +37,13 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims, PADDLE_ENFORCE_GE( axis, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Axis should be great than or equal to 0, but received axis is %d.", axis)); PADDLE_ENFORCE_LE( axis, max_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Axis should be less than or equal to %d, but received axis is %d.", max_dim, axis)); @@ -68,7 +68,7 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims, x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || y_dims_array[i] <= 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Broadcast dimension mismatch. Operands could " "not be broadcast together with the shape of X = [%s] and " "the shape of Y = [%s]. Received [%d] in X is not equal to " @@ -126,7 +126,7 @@ void UnaryOpUnchangedInferShapeCheckAxis(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_GE( axis, -x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(axis) value should be in range [-R, R-1], " "R is the rank of Input(X). But received axis: %d, R: %d.", axis, @@ -134,7 +134,7 @@ void UnaryOpUnchangedInferShapeCheckAxis(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_LT( axis, x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(axis) value should be in range [-R, R-1], " "R is the rank of Input(X). But received axis: %d, R: %d.", axis, @@ -153,7 +153,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_EQ( ctx->GetInputsVarType(y_name).front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The var type of input %s should be phi::DenseTensor, but got %s.", ctx->Inputs(y_name).front(), ctx->GetInputsVarType(y_name).front())); @@ -162,7 +162,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { framework::proto::VarType::SELECTED_ROWS) { PADDLE_ENFORCE_EQ(y_dims.size(), 1u, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For binary broadcastable operator, if X is " "Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, and the size of Y should be 1. " @@ -171,7 +171,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_EQ( y_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For binary broadcastable operator, if X is " "Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the first dimension of Y should be 1. " @@ -179,7 +179,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { y_dims[0])); } else if (ctx->GetInputsVarType(x_name).front() != framework::proto::VarType::LOD_TENSOR) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "For binary broadcastable operator, the var type of input X should " "be LOD_TENSOR, but got %s", ctx->GetInputsVarType(x_name).front())); diff --git a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc index e684efe12c598..9f3034179fdd7 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/common/flags.h" @@ -72,7 +72,7 @@ class ConditionalBlockInferOp : public ConditionalOp { auto *scope_var = scope.FindVar(Output("Scope")); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Scope must be set in ConditionalBlockInferOp.")); auto *scopes = scope_var->GetMutable>(); scopes->resize(1); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 981bf0f8b00f5..3b320dd3f7912 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/control_flow_op_helper.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(use_mkldnn); @@ -73,7 +73,7 @@ class ConditionalBlockOp : public ConditionalOp { auto *scope_var = scope.FindVar(Output(ConditionalOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in conditional_block_op, but " "got a null Scope variable. Please set the Scope variable.")); @@ -139,7 +139,7 @@ class ConditionalBlockInferShape : public framework::InferShapeBase { void operator()(framework::InferShapeContext *context) const override { PADDLE_ENFORCE_EQ(context->HasInputs(ConditionalOp::kCondition), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "conditional_block_op must have condition input.")); } }; @@ -180,14 +180,14 @@ class ConditionalBlockGradOp : public ConditionalOp { auto *scope_var = scope.FindVar(Input(ConditionalOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in conditional_block_op, but " "got a null Scope variable. Please set the Scope variable.")); auto &scopes = scope_var->Get>(); PADDLE_ENFORCE_GT( scopes.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect Scope variable contains at least 1 scope, but got: %d", scopes.size())); framework::Scope &cur_scope = *(scopes[0]); @@ -272,7 +272,7 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInputs(ConditionalOp::kCondition), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Condition must be set in conditional_block_grad_op.")); if (context->HasInputs(ConditionalOp::kInputs) && context->HasOutputs(framework::GradVarName(ConditionalOp::kInputs))) { @@ -294,7 +294,7 @@ class ConditionalBlockGradInferVarType : public framework::VarTypeInference { ctx->OutputSize(framework::GradVarName(ConditionalOp::kInputs)); PADDLE_ENFORCE_EQ(input_size, output_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input_size and output_size should be equal for " "conditional_block_grad_op.")); for (size_t i = 0; i < output_size; ++i) { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h index 0f04a295ed263..7b24ec5629a48 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op.h @@ -53,7 +53,7 @@ class ConditionalOp : public framework::OperatorBase { [&scope](const std::string &var_name) -> const phi::DenseTensor * { auto *var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Cannot find variable %s", var_name)); return &var->Get(); }); @@ -64,14 +64,14 @@ class ConditionalOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ips.size() == 1UL && ips[0]->IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "condition should have one initialized input as condition")); PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(ips[0]->dtype()) == framework::proto::VarType::BOOL && ips[0]->numel() == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "condition input's data type should be bool, " "numel should be 1, actual numel is %d", ips[0]->numel())); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc index 08569d835fd82..2908d1f5a5f81 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc @@ -38,7 +38,7 @@ static void FindAllConditionalBlockAndConditionalBlockGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of forward ops must be greater or equal to backward ops. The " "number of forward ops is %d and the number of backward ops is %d", fwd_ops->size(), @@ -59,7 +59,7 @@ static void FindAllConditionalBlockAndConditionalBlockGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more conditional_block_grad ops than " "conditional_block ops in the graph or program. The number of " "forward ops is %d and the number of backward ops is %d", @@ -122,7 +122,7 @@ static void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl( bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple matched conditional_block ops.")); matched_fwd_op = &fwd_op; } @@ -130,7 +130,7 @@ static void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl( PADDLE_ENFORCE_NOT_NULL( matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward conditional_block op.")); SetSkipVarsForConditionalBlockOp(const_cast(matched_fwd_op), diff --git a/paddle/fluid/operators/controlflow/control_flow_op_helper.h b/paddle/fluid/operators/controlflow/control_flow_op_helper.h index 0d08ae6d68663..945bcbb4e905e 100644 --- a/paddle/fluid/operators/controlflow/control_flow_op_helper.h +++ b/paddle/fluid/operators/controlflow/control_flow_op_helper.h @@ -96,7 +96,7 @@ static void AssignZeroToParentScope( PADDLE_ENFORCE_EQ( outside_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of outside_var %s is NOT phi::DenseTensor, which " "doesn't match input_var %s.", outside_grad_name, @@ -108,7 +108,7 @@ static void AssignZeroToParentScope( } else if (input_var->IsType()) { PADDLE_ENFORCE_EQ(outside_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of outside_var %s is NOT LoDTensorArray, " "which doesn't match input_var %s.", outside_grad_name, @@ -121,7 +121,7 @@ static void AssignZeroToParentScope( } PADDLE_ENFORCE_EQ(input_tensors.size(), outside_tensors->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LoDTensorArray outside_var %s doen't have same " "size as input_var %s.", outside_grad_name, @@ -132,7 +132,7 @@ static void AssignZeroToParentScope( } } else { // TODO(huihuangzheng): add support for SelectedRows - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Conditional block grad op doesn't support non-phi::DenseTensor " "output " "now.")); diff --git a/paddle/fluid/operators/controlflow/depend_op.cc b/paddle/fluid/operators/controlflow/depend_op.cc index 925990ba3ba5f..58ed498ad1b9e 100644 --- a/paddle/fluid/operators/controlflow/depend_op.cc +++ b/paddle/fluid/operators/controlflow/depend_op.cc @@ -50,8 +50,8 @@ class DependOp : public framework::OperatorBase { auto out_name = Output("Out"); PADDLE_ENFORCE_EQ(x_name, out_name, - platform::errors::PreconditionNotMet( - "Input(X) and Output(Out) varibale should be the " + phi::errors::PreconditionNotMet( + "Input(X) and Output(Out) variable should be the " "same, but got Input is %s and Output is %s.", x_name, out_name)); diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc index 7d0d899e8b6c3..141b13a71164b 100644 --- a/paddle/fluid/operators/controlflow/feed_op.cc +++ b/paddle/fluid/operators/controlflow/feed_op.cc @@ -34,7 +34,7 @@ const framework::FeedType& CheckAndGetFeedItem(const phi::ExtendedTensor& x, int col) { PADDLE_ENFORCE_GE(col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Feed') of current feeding variable to be " "no less than 0. But received column index = %d.", @@ -43,7 +43,7 @@ const framework::FeedType& CheckAndGetFeedItem(const phi::ExtendedTensor& x, PADDLE_ENFORCE_LT( static_cast(col), feed_list->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The column index of current feeding variable is expected to be " "less than the length of feeding list. But received column index = " "%d, the length of feeding list = %d", @@ -60,7 +60,7 @@ void FeedDenseTensorKernel(const Context& dev_ctx, phi::DenseTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); const auto& in_tensor = paddle::get(feed_item); @@ -81,7 +81,7 @@ void FeedSparseCooTensorKernel(const Context& dev_ctx, phi::SparseCooTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); const auto& in_tensor = paddle::get(feed_item); @@ -103,7 +103,7 @@ void FeedStringsKernel(const Context& dev_ctx UNUSED, phi::ExtendedTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); auto strs_out = static_cast(out); diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index d3b4b086470a0..c9ceb1f3e01b2 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -76,7 +76,7 @@ class FetchOp : public framework::OperatorBase { auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE_NOT_NULL( fetch_var, - platform::errors::NotFound( + phi::errors::NotFound( "Input variable(%s) cannot be found in scope for operator 'Fetch'." "Confirm that you have used the fetch `Variable` format " "instead of the string literal('%s') in `fetch_list` " @@ -91,15 +91,15 @@ class FetchOp : public framework::OperatorBase { auto *out_var = scope.FindVar(out_name); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound("Output variable(%s) cannot be found " - "in scope for operator 'Fetch'.", - out_name)); + phi::errors::NotFound("Output variable(%s) cannot be found " + "in scope for operator 'Fetch'.", + out_name)); int col = Attr("col"); PADDLE_ENFORCE_GE( col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Fetch') of current fetching variable to be " "no less than 0. But received column index = %d.", diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc index 8e811c20b28ff..591d3bed324d3 100644 --- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc @@ -128,14 +128,14 @@ class FetchV2Kernel { PADDLE_ENFORCE_EQ( ctx.HasOutput("Out"), true, - platform::errors::NotFound("Output(Out) of fetch_v2_op is not found.")); + phi::errors::NotFound("Output(Out) of fetch_v2_op is not found.")); auto *out_var = ctx.OutputVar("Out"); int col = ctx.Attr("col"); PADDLE_ENFORCE_GE( col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Fetch') of current fetching variable to be " "no less than 0. But received column index = %d.", @@ -163,8 +163,8 @@ class FetchV2Kernel { PADDLE_ENFORCE_EQ( check_place, true, - platform::errors::InvalidArgument("Tensor's place of input(X) must " - "be CPUPlace or CUDAPinnedPlace.")); + phi::errors::InvalidArgument("Tensor's place of input(X) must " + "be CPUPlace or CUDAPinnedPlace.")); if (deepcopy) { DeepCopy(src_item, fetch_var_name, dst_item); } else { @@ -186,7 +186,7 @@ class FetchV2Kernel { for (size_t i = 0; i < src_item.size(); ++i) { PADDLE_ENFORCE_EQ(platform::is_cpu_place(src_item[i].place()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Tensor's place of input(X) must be CPUPlace.")); if (deepcopy) { DeepCopy(src_item[i], fetch_var_name, &dst_item[i]); @@ -244,7 +244,7 @@ PD_REGISTER_STRUCT_KERNEL(fetch_v2, int64_t, uint8_t, bool, - plat::float16, + phi::dtype::float16, plat::bfloat16, plat::complex, plat::complex) {} diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc index 9262ca59af970..e9b9d21d57399 100644 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ b/paddle/fluid/operators/controlflow/get_places_op.cc @@ -66,8 +66,8 @@ class GetPlacesOp : public framework::OperatorBase { PADDLE_ENFORCE_NE( device_count, 0UL, - platform::errors::InvalidArgument("Cannot indicate %s device count", - is_gpu ? "GPU" : "CPU")); + phi::errors::InvalidArgument("Cannot indicate %s device count", + is_gpu ? "GPU" : "CPU")); auto out_var_name = Output("Out"); auto &places = @@ -78,7 +78,7 @@ class GetPlacesOp : public framework::OperatorBase { if (is_gpu) { PADDLE_ENFORCE_LE(device_count, CUDADevCount(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only %d CUDA devices found, cannot set to %d", CUDADevCount(), device_count)); diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h index 614db61558f79..8fde735d99936 100644 --- a/paddle/fluid/operators/controlflow/logical_op_xpu.h +++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h @@ -82,7 +82,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { bcast_ydims); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU broadcast kernel return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); @@ -118,7 +118,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { bcast_ydims); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU broadcast kernel return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); @@ -144,11 +144,11 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ret, XPU_SUCCESS, - platform::errors::External("XPU API return wrong value[%d %s] in " - "op_name[%s].", - ret, - XPUAPIErrorMsg[ret], - XpuLogicalType2Str(xpu_type))); + phi::errors::External("XPU API return wrong value[%d %s] in " + "op_name[%s].", + ret, + XPUAPIErrorMsg[ret], + XpuLogicalType2Str(xpu_type))); if (need_broad_cast && dev_ctx.x_context()->xpu_stream != nullptr) { dev_ctx.Wait(); @@ -178,7 +178,7 @@ class UnaryLogicalOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s].", ret, XPUAPIErrorMsg[ret])); } }; diff --git a/paddle/fluid/operators/controlflow/op_variant.cc b/paddle/fluid/operators/controlflow/op_variant.cc index 8d43a21e66437..0976ff36e63b2 100644 --- a/paddle/fluid/operators/controlflow/op_variant.cc +++ b/paddle/fluid/operators/controlflow/op_variant.cc @@ -70,11 +70,10 @@ void AppendOpVariantByOpName(const std::vector &op_descs, std::vector *result_ops) { PADDLE_ENFORCE_NOT_NULL( result_ops, - platform::errors::Unavailable("result_ops should not be a null_ptr.")); + phi::errors::Unavailable("result_ops should not be a null_ptr.")); for (auto *op_desc : op_descs) { PADDLE_ENFORCE_NOT_NULL( - op_desc, - platform::errors::Unavailable("op_desc should not be a null_ptr.")); + op_desc, phi::errors::Unavailable("op_desc should not be a null_ptr.")); if (op_desc->Type() == candidate_op_name) { result_ops->emplace_back(op_desc); } @@ -87,11 +86,10 @@ void AppendOpVariantByOpName( std::unordered_set *result_ops) { PADDLE_ENFORCE_NOT_NULL( result_ops, - platform::errors::Unavailable("result_ops should not be a null_ptr.")); + phi::errors::Unavailable("result_ops should not be a null_ptr.")); for (auto *op_desc : op_descs) { PADDLE_ENFORCE_NOT_NULL( - op_desc, - platform::errors::Unavailable("op_desc should not be a null_ptr.")); + op_desc, phi::errors::Unavailable("op_desc should not be a null_ptr.")); if (op_desc->Type() == candidate_op_name) { result_ops->emplace(op_desc); } diff --git a/paddle/fluid/operators/controlflow/op_variant.h b/paddle/fluid/operators/controlflow/op_variant.h index ad7cc6b741eb9..ed13a0285c375 100644 --- a/paddle/fluid/operators/controlflow/op_variant.h +++ b/paddle/fluid/operators/controlflow/op_variant.h @@ -49,10 +49,9 @@ class OpVariant { const AttrType &Attr(const std::string &name) const { auto &attrs = Attrs(); auto it = attrs.find(name); - PADDLE_ENFORCE_NE( - it, - attrs.end(), - platform::errors::NotFound("Cannot find attribute %s.", name)); + PADDLE_ENFORCE_NE(it, + attrs.end(), + phi::errors::NotFound("Cannot find attribute %s.", name)); return PADDLE_GET_CONST(AttrType, it->second); } diff --git a/paddle/fluid/operators/controlflow/pylayer_op.cc b/paddle/fluid/operators/controlflow/pylayer_op.cc index bd83c99a0c62d..57bce4224770a 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op.cc @@ -95,7 +95,7 @@ class PyLayerForwardOp : public PyLayerOp { auto *scope_var = scope.FindVar(Output(kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in pylayer_op, but " "got a null Scope variable. Please set the Scope variable.")); @@ -109,7 +109,7 @@ class PyLayerForwardOp : public PyLayerOp { PADDLE_ENFORCE_GT( blocks.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect blocks contains at least 1 block, but got: %d", blocks.size())); @@ -123,7 +123,7 @@ class PyLayerForwardOp : public PyLayerOp { LOG_FIRST_N(INFO, 1) << "[ControlFlow][PyLayer] New Executor is Running."; CreateInterpreter(dev_place, *forward_block, &cur_scope, skip_vars); - PADDLE_ENFORCE_NOT_NULL(core_, platform::errors::Fatal("core_ is nullptr")); + PADDLE_ENFORCE_NOT_NULL(core_, phi::errors::Fatal("core_ is nullptr")); core_->Run({}, false); } }; @@ -156,7 +156,7 @@ class PyLayerBackwardMaker : public framework::SingleGradOpMaker { PADDLE_ENFORCE_GT( blocks.size(), static_cast(PyLayerBlockIndex::kBACKWARD), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect blocks contains at least 2 block, but got: %d", blocks.size())); grad_op->SetBlockAttr( @@ -188,7 +188,7 @@ class PyLayerBackwardOp : public PyLayerOp { PADDLE_ENFORCE_EQ( inside_grads.size(), outside_grads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mismatch inside_grads.size(): %d, and outside_grads.size(): %d", inside_grads.size(), outside_grads.size())); @@ -196,14 +196,14 @@ class PyLayerBackwardOp : public PyLayerOp { auto *scope_var = scope.FindVar(Input(PyLayerOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in pylayer_op, but " "got a null Scope variable. Please set the Scope variable.")); auto &scopes = scope_var->Get>(); PADDLE_ENFORCE_GT( scopes.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect Scope variable contains at least 1 scope, but got: %d", scopes.size())); framework::Scope &cur_scope = *(scopes[0]); @@ -216,7 +216,7 @@ class PyLayerBackwardOp : public PyLayerOp { << "[ControlFlow][PyLayerBackwardOp] New Executor is Running."; CreateInterpreter(dev_place, *backward_block, &cur_scope, inside_grads); - PADDLE_ENFORCE_NOT_NULL(core_, platform::errors::Fatal("core_ is nullptr")); + PADDLE_ENFORCE_NOT_NULL(core_, phi::errors::Fatal("core_ is nullptr")); core_->Run({}, false); @@ -252,7 +252,7 @@ class PyLayerBackwardInferVarType : public framework::VarTypeInference { ctx->OutputSize(framework::GradVarName(PyLayerOp::kInputs)); PADDLE_ENFORCE_EQ(forward_input_size, backward_output_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input_size and output_size should be equal for " "pylayer_grad op.")); for (size_t i = 0; i < backward_output_size; ++i) { diff --git a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc index 9dc53d428ef1d..bdd669c644e6e 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc @@ -38,7 +38,7 @@ static void FindAllPyLayerOpAndPyLayerGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of forward ops must be greater or equal to backward ops. The " "number of forward ops is %d and the number of backward ops is %d", fwd_ops->size(), @@ -59,7 +59,7 @@ static void FindAllPyLayerOpAndPyLayerGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more pylayer_grad ops than " "pylayer ops in the graph or program. The number of " "forward ops is %d and the number of backward ops is %d", @@ -119,14 +119,14 @@ static void PrepareSafeEagerDeletionOnPyLayerOpAndPyLayerGradOp( if (IsMatchedPyLayerOpAndPyLayerGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple matched pylayer ops.")); matched_fwd_op = &fwd_op; } } PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward pylayer op.")); SetSkipVarsForPyLayerOp(const_cast(matched_fwd_op), &bwd_op); diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc index 2851757dccc4d..e290fa3e016bd 100644 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc +++ b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc @@ -74,7 +74,7 @@ static void FindAllOpAndGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( ops.size(), grad_ops.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more grad ops than forward ops in the graph or program, " "the number of ops is %d and the number of grad_ops is %d.", ops.size(), @@ -95,7 +95,7 @@ static void FindAllOpAndGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( ops.size(), grad_ops.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more grad ops than forward ops in the graph or program, " "the number of ops is %d and the number of grad_ops is %d.", ops.size(), @@ -183,7 +183,7 @@ static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr( PADDLE_ENFORCE_EQ( fwd_input.size(), in_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward input gradient number does not match forward " "input number. The number of forward input number is %d and the " "number of backward input gradient number is %d.", @@ -203,7 +203,7 @@ static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr( PADDLE_ENFORCE_EQ( fwd_param.size(), param_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward parameter gradient number does not match " "forward parameter number. The number of forward parameter number is " "%d and the number of backward parameter gradient is %d.", @@ -269,15 +269,15 @@ void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( if (IsMatchedRecurrentOpAndRecurrentGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple recurrent forward op matches " "recurrent grad op.")); matched_fwd_op = &fwd_op; } } - PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( - "Cannot find matched forward op.")); + PADDLE_ENFORCE_NOT_NULL( + matched_fwd_op, + phi::errors::PreconditionNotMet("Cannot find matched forward op.")); SetRecurrentOpAndRecurrentGradOpSkipVarAttr(*matched_fwd_op, bwd_op); recurrent_ops.erase(*matched_fwd_op); } diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index c04e897aa6366..52006166c8fc8 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -94,7 +94,7 @@ class WriteToArrayInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("I"), true, - platform::errors::NotFound("Input(I) of WriteToArrayOp is not found.")); + phi::errors::NotFound("Input(I) of WriteToArrayOp is not found.")); // TODO(wangchaochaohu) control flow Op do not support runtime infer shape // Later we add [ontext->GetInputDim("I")) == 1] check when it's supported @@ -103,10 +103,10 @@ class WriteToArrayInferShape : public framework::InferShapeBase { return; } - PADDLE_ENFORCE_EQ(context->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of WriteToArrayOp is not found.")); + PADDLE_ENFORCE_EQ( + context->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of WriteToArrayOp is not found.")); context->SetOutputDim("Out", context->GetInputDim("X")); // When compile time, we need to: @@ -148,15 +148,13 @@ class ReadFromArrayOp : public ArrayOp { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL(x, - platform::errors::NotFound( - "Input(X) of ReadFromArrayOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + x, phi::errors::NotFound("Input(X) of ReadFromArrayOp is not found.")); auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( - "Output(Out) of ReadFromArrayOp is not found.")); + phi::errors::NotFound("Output(Out) of ReadFromArrayOp is not found.")); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { auto *out_tensor = out->GetMutable(); diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 5c758bbf7ff42..65f9145dbd89d 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/operators/controlflow/while_op_helper.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/flags.h" @@ -64,15 +64,15 @@ class WhileOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &dev_place) const override { - PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition)), - platform::errors::NotFound( - "Input(Condition) of WhileOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + scope.FindVar(Input(kCondition)), + phi::errors::NotFound("Input(Condition) of WhileOp is not found.")); auto &cond = scope.FindVar(Input(kCondition))->Get(); PADDLE_ENFORCE_EQ( cond.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Condition) of WhileOp must be 1. But now " "the Condition's numel is ", cond.numel(), @@ -136,7 +136,7 @@ class WhileOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(step_scopes->size(), 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The Output(StepScope) of WhileOp should be empty.")); bool cond_data = GetCondData(cond); @@ -329,7 +329,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( Attr("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "WhileGradOp is only callable when is_test is false.")); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); @@ -350,7 +350,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(outside_og_names.size(), inside_og_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of original output gradient names " "does not match the number of backward input " "gradient names. The number of Backward input " @@ -397,7 +397,7 @@ class WhileGradOp : public framework::OperatorBase { !og_outside.GetMutable()->IsInitialized()) { auto *var_desc = parent_block->FindVarRecursive(outside_og_name); PADDLE_ENFORCE_NOT_NULL(var_desc, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Var `%s` is not found in parent " "block, can't fill constant.", outside_og_name)); @@ -448,7 +448,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( inside_array[j].numel(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of %d-th element of var %s (LoDTensorArray) " "in while block must be 0, but received its numel is %d.", j, @@ -457,7 +457,7 @@ class WhileGradOp : public framework::OperatorBase { } } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Currently only support phi::DenseTensor and " "phi::DenseTensorArray in " "WhileGradOp.")); @@ -474,7 +474,7 @@ class WhileGradOp : public framework::OperatorBase { auto &p_names = Inputs(kX); PADDLE_ENFORCE_EQ(pg_ig_names.size(), p_names.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of names in Outputs(X@GRAD) does not " "match the number of names in Inputs(X). The " "number of names in Outputs(X@GRAD) is %d and " @@ -493,8 +493,8 @@ class WhileGradOp : public framework::OperatorBase { auto pg_ig_var = cur_scope.FindVar(inside_grad_name); PADDLE_ENFORCE_NOT_NULL( pg_ig_var, - platform::errors::NotFound("Variable %s is not found.", - inside_grad_name)); + phi::errors::NotFound("Variable %s is not found.", + inside_grad_name)); if (pg_ig_var->IsType()) { auto pg_ig_lod_t_arr = pg_ig_var->GetMutable(); @@ -531,13 +531,13 @@ class WhileGradOp : public framework::OperatorBase { auto *var = (*cur_scope_iter)->FindVar(inside_grad_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable %s is not found.", - inside_grad_name)); + phi::errors::NotFound("Variable %s is not found.", + inside_grad_name)); PADDLE_ENFORCE_EQ( var->IsType() || var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Currently the type of var only can be LoDTensorArray, " "or phi::DenseTensor, but the received var[%s] is %s.", inside_grad_name, @@ -721,7 +721,7 @@ class WhileGradOpShapeInference : public framework::InferShapeBase { auto out_var_ptrs = ctx->GetOutputVarPtrs(kXGRAD); PADDLE_ENFORCE_EQ(in_var_ptrs.size(), out_var_ptrs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Inputs(X) must be the same as " "the size of Outputs(X@GRAD).")); diff --git a/paddle/fluid/operators/controlflow/while_op_helper.cc b/paddle/fluid/operators/controlflow/while_op_helper.cc index 80b4abe763123..638f2fbae740a 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.cc +++ b/paddle/fluid/operators/controlflow/while_op_helper.cc @@ -86,7 +86,7 @@ static void ModifyWhileOpAndWhileGradOpAttr(const OpVariant &fwd_op, PADDLE_ENFORCE_EQ( fwd_input.size(), in_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward output gradient number does not match forward input number." "The number of forward input number is %d and the number of backward " "output gradient number is %d.", @@ -116,7 +116,7 @@ static void FindAllWhileAndWhileGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( while_ops->size(), while_grad_ops->size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "There are more while_grad_ops than forward while_ops in the graph " "or program, the number of while_ops is %d and the number of " "while_grad_ops is %d.", @@ -137,7 +137,7 @@ static void FindAllWhileAndWhileGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( while_ops->size(), while_grad_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more while_grad_ops than forward while_ops in the graph " "or program, the number of while_ops is %d and the number of " "while_grad_ops is %d.", @@ -167,14 +167,14 @@ static void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOpImpl( if (IsMatchedWhileOpAndWhileGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple while forward ops match while " "grad ops.")); matched_fwd_op = &fwd_op; } } PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward while op.")); ModifyWhileOpAndWhileGradOpAttr(*matched_fwd_op, bwd_op); while_op_set.erase(*matched_fwd_op); @@ -231,7 +231,7 @@ bool GetCondData(const phi::DenseTensor &cond) { defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) framework::TensorCopySync(cond, platform::CPUPlace(), cpu_cond.get()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "This version of PaddlePaddle does NOT support GPU/XPU but got " "GPU/XPU tensor Cond in WhileOp. Please compile WITH_GPU or " "WITH_XPU option.")); diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc index ed433518068b4..45fccab591dca 100644 --- a/paddle/fluid/operators/copy_cross_scope_op.cc +++ b/paddle/fluid/operators/copy_cross_scope_op.cc @@ -51,7 +51,7 @@ class CopyCrossScopeOp : public framework::OperatorBase { bool ToM = Attr("to_main_scope"); PADDLE_ENFORCE_EQ(num_micro_scopes, num_micro_batches, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For pipeline, number of micro scopes (%d) should " "be equal to number of micro batches (%d).", num_micro_scopes, @@ -60,7 +60,7 @@ class CopyCrossScopeOp : public framework::OperatorBase { auto* id_var = scope.FindVar(id_name); PADDLE_ENFORCE_NOT_NULL( id_var, - platform::errors::NotFound("No variable with name %s found.", id_name)); + phi::errors::NotFound("No variable with name %s found.", id_name)); auto id_tensor = id_var->GetMutable(); auto it = scope.kids().begin(); phi::DenseTensor cpu_id_tensor; @@ -77,12 +77,12 @@ class CopyCrossScopeOp : public framework::OperatorBase { auto* dst_var = dst_scope->FindVar(x_name); PADDLE_ENFORCE_NOT_NULL( dst_var, - platform::errors::NotFound( + phi::errors::NotFound( "No variable with name %s found in source scope.", x_name)); auto* main_var = scope.FindVar(x_name); PADDLE_ENFORCE_NOT_NULL( main_var, - platform::errors::NotFound( + phi::errors::NotFound( "No variable with name %s found in destination scope.", x_name)); auto dst_tensor = dst_var->GetMutable(); @@ -99,12 +99,12 @@ class CopyCrossScopeOp : public framework::OperatorBase { auto* source_var = source_scope->FindVar(x_name); PADDLE_ENFORCE_NOT_NULL( source_var, - platform::errors::NotFound( - "No variable with name %s found in source scope.", x_name)); + phi::errors::NotFound("No variable with name %s found in source scope.", + x_name)); auto* dst_var = dst_scope->FindVar(x_name); PADDLE_ENFORCE_NOT_NULL( dst_var, - platform::errors::NotFound( + phi::errors::NotFound( "No variable with name %s found in destination scope.", x_name)); auto src_tensor = source_var->GetMutable(); auto dst_tensor = dst_var->GetMutable(); @@ -115,7 +115,7 @@ class CopyCrossScopeOp : public framework::OperatorBase { auto* main_var = scope.FindVar(x_name); PADDLE_ENFORCE_NOT_NULL( main_var, - platform::errors::NotFound( + phi::errors::NotFound( "No variable with name %s found in destination scope.", x_name)); auto main_tensor = main_var->GetMutable(); paddle::framework::TensorCopySync( diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index 427f9a0307399..1243ae595bac4 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -84,19 +84,19 @@ class CorrelationOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("Input1"); auto in2_dims = ctx->GetInputDim("Input2"); - PADDLE_ENFORCE_EQ(in_dims.size() == 4, - true, - platform::errors::InvalidArgument( - "Input(X) of CorrelationOp must be 4 dims." - "But received dims is %d.", - in_dims.size())); - - PADDLE_ENFORCE_EQ(in2_dims.size() == 4, - true, - platform::errors::InvalidArgument( - "Input(Y) of CorrelationOp must be 4 dims." - "But received dims is %d.", - in2_dims.size())); + PADDLE_ENFORCE_EQ( + in_dims.size() == 4, + true, + phi::errors::InvalidArgument("Input(X) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in_dims.size())); + + PADDLE_ENFORCE_EQ( + in2_dims.size() == 4, + true, + phi::errors::InvalidArgument("Input(Y) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in2_dims.size())); std::vector output_shape = CorrelationOutputSize(static_cast(in_dims[0]), static_cast(in_dims[2]), @@ -114,11 +114,11 @@ class CorrelationOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ(input_data_type, - framework::TransToProtoVarType( - ctx.Input("Input2")->dtype()), - platform::errors::InvalidArgument( - "X and Y shoule have the same datatype")); + PADDLE_ENFORCE_EQ( + input_data_type, + framework::TransToProtoVarType( + ctx.Input("Input2")->dtype()), + phi::errors::InvalidArgument("X and Y shoule have the same datatype")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -173,7 +173,7 @@ class CorrelationKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::Unimplemented("Correlation only supports GPU now.")); + phi::errors::Unimplemented("Correlation only supports GPU now.")); } }; diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu index ee6cc22c867c3..61b922e0caecc 100644 --- a/paddle/fluid/operators/correlation_op.cu +++ b/paddle/fluid/operators/correlation_op.cu @@ -179,10 +179,10 @@ template class CorrelationCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "Correlation only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("Correlation only supports GPU now.")); auto *input1 = ctx.Input("Input1"); auto *input2 = ctx.Input("Input2"); @@ -447,10 +447,10 @@ template class CorrelationCUDAGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "Correlation only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("Correlation only supports GPU now.")); const auto *input1 = ctx.Input("Input1"); const auto *input2 = ctx.Input("Input2"); const auto *grad_output = diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index e0cbcc513d6cd..62edb0ece83fc 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -103,7 +103,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { if (has_length) { PADDLE_ENFORCE_EQ(emission_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 3-D tensor. But " "received: input rank %u, input shape [%s]. ", emission_dims.size(), @@ -111,7 +111,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(emission_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 2-D tensor. But " "received: input rank %u, input shape [%s].", emission_dims.size(), @@ -121,7 +121,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { auto transition_dims = ctx->GetInputDim("Transition"); PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Transition) should be a 2-D tensor. But " "received: input rank %u, input shape [%s].", transition_dims.size(), @@ -129,7 +129,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "An invalid dimension for the Input(Transition), which should " "be a 2-D tensor with shape [(D + 2) x D]. But received: input " "rank %u, " @@ -140,7 +140,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { transition_dims[transition_dims.size() - 1] > 0)) { PADDLE_ENFORCE_EQ(emission_dims[emission_dims.size() - 1], transition_dims[transition_dims.size() - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the Input(Emission) and the " "Input(Transition) " "should be equal to the tag number. But received " @@ -159,7 +159,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { (label_dims.size() == 3UL && label_dims[2] == 1) || label_dims.size() == 2UL, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 3-D tensor with last dimension " "fixed to 1 or a 2-D tensor in padding mode. But received: " "input " @@ -171,7 +171,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { (label_dims.size() == 2UL && label_dims[1] == 1) || label_dims.size() == 1UL, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 2-D tensor with last " "dimension fixed to 1 or a 1-D tensor. But received: " "input rank %u, input shape [%s].", @@ -182,7 +182,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( emission_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Emission) and Input(Label) " "should be the same. But received Input(Emission): rank %u, " "shape [%s]; received Input(Label): rank %u, shape [%s].", diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 50d6eece098e3..6649043014d64 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -78,7 +78,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a sequence with lod " "level 1. But received: lod level %u.", emission_weights->NumLevels())); @@ -86,7 +86,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( lod.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Emission) must be a sequence. But received: lod level %u.", lod.size())); const size_t level = 0; @@ -105,7 +105,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { if (label) { PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(label) should be a sequence with lod " "level 1. But received: lod level %u.", label->NumLevels())); diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index 19164959c7ceb..80db8230e9e24 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -34,7 +34,7 @@ class CropOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( int64_t(shape.size()), x_dim.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of CropOp's " "'shape' attribute should be equal to the number of dimensions " "(%d) of the Input(X).", @@ -49,7 +49,7 @@ class CropOp : public framework::OperatorWithKernel { auto y_dim = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(common::arity(x_dim), common::arity(y_dim), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions (%d) of CropOp's input(X)" " must be equal to that (%d) of input(Y).", common::arity(x_dim), diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index 7d0d4f06392fa..04b077de36e50 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { @@ -36,25 +36,25 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { if (ctx.HasInput("Offsets")) { PADDLE_ENFORCE_EQ(ctx.Attr>("offsets").empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Offsets' and attribute 'offsets' " "should not be used at the same time for CropOp.")); const auto* offsets_tensor = ctx.Input("Offsets"); PADDLE_ENFORCE_EQ(offsets_tensor->dims().size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of input 'Offsets' for " "CropOp must be 1, but the value received is %d.", offsets_tensor->dims().size())); PADDLE_ENFORCE_EQ( rank, offsets_tensor->dims()[0], - platform::errors::InvalidArgument("The number of elements (%d) for " - "input 'Offsets' must be equal to " - "the number of dimensions (%d) " - "of the input tensor.", - offsets_tensor->dims()[0], - rank)); + phi::errors::InvalidArgument("The number of elements (%d) for " + "input 'Offsets' must be equal to " + "the number of dimensions (%d) " + "of the input tensor.", + offsets_tensor->dims()[0], + rank)); const int* offsets_data; phi::DenseTensor cpu_tmp_tensor; if (platform::is_cpu_place(offsets_tensor->place())) { @@ -70,12 +70,12 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { PADDLE_ENFORCE_EQ( rank, static_cast(res.size()), - platform::errors::InvalidArgument("The number of elements (%d) for " - "input 'Offsets' must be equal to " - "the number of dimensions (%d) " - "of the input tensor.", - res.size(), - rank)); + phi::errors::InvalidArgument("The number of elements (%d) for " + "input 'Offsets' must be equal to " + "the number of dimensions (%d) " + "of the input tensor.", + res.size(), + rank)); } return res; } @@ -101,7 +101,7 @@ void CropFunction(const framework::ExecutionContext& context) { } auto& place = *context.template device_context().eigen_device(); - EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( place, out_tensor, x_tensor, e_offsets, e_shape); } @@ -113,14 +113,14 @@ class CropKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the Input(X) for CropOp must be " "greater than or equal to 1, but the value received is %d.", rank)); PADDLE_ENFORCE_LE( rank, 6, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the Input(X) for CropOp must be " "less than or equal to 6, but the value received is %d.", rank)); @@ -165,7 +165,7 @@ void CropGradFunction(const framework::ExecutionContext& context) { auto d_out_tensor = EigenTensor::From(*d_out); auto& place = *context.template device_context().eigen_device(); - EigenPad, T, D>::Eval( + phi::funcs::EigenPad, T, D>::Eval( place, d_x_tensor, d_out_tensor, paddings, static_cast(0)); } } @@ -181,7 +181,7 @@ class CropGradKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' for " "CropGrad must be greater than or equal " "to 1, but the value received is %d.", @@ -189,7 +189,7 @@ class CropGradKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( rank, 6, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' for " "CropGrad must be less than or equal " "to 6, but the value received is %d.", diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index cc2b4b4252835..e8baeac3b0bfa 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -42,7 +42,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::slice_ddim(x_dims, 0, rank - 1), common::slice_ddim(label_dims, 0, rank - 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) and Input(Label) shall have the same shape " "except the last dimension. But received: the shape of Input(X) " "is " @@ -55,7 +55,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rank, label_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Attr(soft_label) == true, Input(X) and Input(Label) " "shall have the same dimensions. But received: the dimensions of " "Input(X) is [%d]," @@ -72,7 +72,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[rank - 1], label_dims[rank - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Attr(soft_label) == true, the last dimension of " "Input(X) and Input(Label) should be equal. But received: the" "last dimension of Input(X) is [%d], the shape of Input(X) is " @@ -91,7 +91,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dims[rank - 1], 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the last dimension of Input(Label) should be 1." "But received: the last dimension of Input(Label) is [%d]," "the last dimension is [%d]", @@ -101,7 +101,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rank, label_dims.size() + 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The rank of Input(X) should be equal to " "Input(Label) plus 1." "But received: The dimension of Input(X) is [%d], " @@ -160,7 +160,7 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dy_dims.size(), label_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y@Grad) and Input(Y) should have the same rank." "But received: Y@Grad's rank is [%d], Y's rank is [%d]", dy_dims.size(), @@ -175,7 +175,7 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::slice_ddim(x_dims, 0, rank - 1), common::slice_ddim(dy_dims, 0, rank - 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) and Input(Y@Grad) should have the same " "shape except the last dimension. but received: " "the shape of Input(X) is [%s], " diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu index 06ac7791e6d68..e4e2420d152bc 100644 --- a/paddle/fluid/operators/cross_entropy_op.cu +++ b/paddle/fluid/operators/cross_entropy_op.cu @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; namespace ops = paddle::operators; @@ -24,14 +24,14 @@ PD_REGISTER_STRUCT_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy_grad, GPU, ALL_LAYOUT, ops::CrossEntropyGradientOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy2, GPU, @@ -39,11 +39,11 @@ PD_REGISTER_STRUCT_KERNEL(cross_entropy2, ops::CrossEntropyOpKernel2, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy_grad2, GPU, ALL_LAYOUT, ops::CrossEntropyGradientOpKernel2, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 5b76cc9a65a2b..9c0d025cb0cbb 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -180,7 +180,7 @@ struct HardLabelCrossEntropyForwardFunctor { auto label = label_[idx]; if (label != ignore_index_) { // don't update to PADDLE_ENFORCE_GE and PADDLE_ENFORCE_LT cause - // can't use platform::errors::InvalidArgument in HOSTDEVICE + // can't use phi::errors::InvalidArgument in HOSTDEVICE PADDLE_ENFORCE(label >= 0 && label < feature_size_, "Variable value (label) of " "OP(fluid.layers.cross_entropy) expected >= 0 " diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index 3b7490b1dcff3..76466ed12ab88 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -82,7 +82,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "CTCAlign operator CUDA kernel must use CUDAPlace " "rather than CPUPlace.")); auto* input = ctx.Input("Input"); diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index faa2efab772a6..9ebfa7196ecc5 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -72,7 +72,7 @@ class CTCAlignKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( input_dims[0], static_cast(input_lod[level].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension %d of CTCAlign operator Input(Input) should " "be equal to " "the sum of all sequences' lengths %d.", diff --git a/paddle/fluid/operators/cudnn_rnn_cache.h b/paddle/fluid/operators/cudnn_rnn_cache.h index 9b6774af5832a..eaca6842d350c 100644 --- a/paddle/fluid/operators/cudnn_rnn_cache.h +++ b/paddle/fluid/operators/cudnn_rnn_cache.h @@ -267,7 +267,7 @@ class CudnnRNNCache { PADDLE_ENFORCE_EQ( weights_size_, cudnn_size * weight_numel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The cudnn lstm and setting weight size should be same.")); int dim_w[3]; diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc index d63197af754f2..d45a1a5a6a675 100644 --- a/paddle/fluid/operators/custom_device_common_op_registry.cc +++ b/paddle/fluid/operators/custom_device_common_op_registry.cc @@ -65,19 +65,19 @@ class CConcatOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -107,7 +107,7 @@ class CConcatOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->GetSize(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->GetSize())); int64_t send_numel = x->numel(); @@ -160,7 +160,7 @@ class CIdentityOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_identity op must be non-negative.", rid)); ctx.device_context().Alloc(out); @@ -180,19 +180,19 @@ class CSplitOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_split must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_split must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_split must be " "less than that of nranks (%d).", rank, @@ -259,7 +259,7 @@ class CEmbeddingOpCustomDeviceKernel : public framework::OpKernel { *reinterpret_cast(out_tensor.impl().get())) .Resize(out_dims); } else { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "CustomDevice c_embedding ids only support int32 or int64.")); } } @@ -319,7 +319,7 @@ class CEmbeddingGradOpCustomDeviceKernel : public framework::OpKernel { table_grad_t->ShareDataWith( *reinterpret_cast(table_grad_tensor.impl().get())); } else { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "CustomDevice c_embedding ids only support int32 or int64.")); } } @@ -543,11 +543,11 @@ class CAllReduceOpCustomDeviceKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -594,8 +594,8 @@ class CAllReduceOpCustomDeviceKernel : public framework::OpKernel { break; default: - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(in_tensor, out_tensor, opts); @@ -910,14 +910,14 @@ class GlobalScatterOpCustomDeviceKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(local_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in local_count.")); - PADDLE_ENFORCE_EQ(global_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in global_count.")); + PADDLE_ENFORCE_EQ( + local_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in local_count.")); + PADDLE_ENFORCE_EQ( + global_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in global_count.")); auto map = distributed::ProcessGroupMapFromGid::getInstance(); const int64_t* cpu_local_count_data; @@ -1124,14 +1124,14 @@ class GlobalGatherOpCustomDeviceKernel : public framework::OpKernel { auto place = ctx.GetPlace(); auto out = ctx.Output("Out"); - PADDLE_ENFORCE_EQ(local_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in local_count.")); - PADDLE_ENFORCE_EQ(global_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in global_count.")); + PADDLE_ENFORCE_EQ( + local_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in local_count.")); + PADDLE_ENFORCE_EQ( + global_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in global_count.")); const int64_t* cpu_local_count_data; const int64_t* cpu_global_count_data; @@ -1370,7 +1370,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CConcatOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_split, device_type, @@ -1382,7 +1382,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { int>, paddle::operators::CSplitOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_embedding, device_type, @@ -1391,7 +1391,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CEmbeddingOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_embedding_grad, device_type, @@ -1400,7 +1400,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CEmbeddingGradOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_softmax_with_cross_entropy, @@ -1413,7 +1413,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSoftmaxWithCrossEntropyOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_softmax_with_cross_entropy_grad, @@ -1426,7 +1426,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSoftmaxWithCrossEntropyGradCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_identity, @@ -1445,7 +1445,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { int64_t>, paddle::operators::CIdentityOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_sync_calc_stream, @@ -1467,7 +1467,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSyncCalcStreamCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_allreduce_sum, device_type, @@ -1481,7 +1481,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1504,7 +1504,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1527,7 +1527,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::MIN>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::MIN>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1550,7 +1550,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::MAX>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::MAX>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1573,7 +1573,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::PRODUCT>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::PRODUCT>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1590,8 +1590,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::CBroadcastOpCustomDeviceKernel, paddle::operators::CBroadcastOpCustomDeviceKernel, paddle::operators::CBroadcastOpCustomDeviceKernel, - paddle::operators::CBroadcastOpCustomDeviceKernel< - paddle::platform::float16>) {} + paddle::operators::CBroadcastOpCustomDeviceKernel) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( barrier, device_type, @@ -1614,7 +1613,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::RandomRoutingOpCustomDeviceKernel, paddle::operators::RandomRoutingOpCustomDeviceKernel, paddle::operators::RandomRoutingOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( assign_pos, device_type, @@ -1628,7 +1627,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::GlobalScatterOpCustomDeviceKernel, paddle::operators::GlobalScatterOpCustomDeviceKernel, paddle::operators::GlobalScatterOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( global_gather, device_type, @@ -1637,7 +1636,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::GlobalGatherOpCustomDeviceKernel, paddle::operators::GlobalGatherOpCustomDeviceKernel, paddle::operators::GlobalGatherOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} #endif } diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index 1e414ff217c2f..a305263338769 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -33,8 +33,8 @@ class CVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2UL, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 2, but got %d", x_dims.size())); + phi::errors::InvalidArgument("Input(X)'s rank should be 2, but got %d", + x_dims.size())); if (ctx->Attrs().Get("use_cvm")) { ctx->SetOutputDim("Y", {x_dims[0], x_dims[1]}); @@ -77,23 +77,23 @@ class CVMGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", x_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + x_dims.size())); PADDLE_ENFORCE_EQ( dy_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", dy_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + dy_dims.size())); PADDLE_ENFORCE_EQ( cvm_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", cvm_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + cvm_dims.size())); PADDLE_ENFORCE_EQ( x_dims[0], dy_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of Input(X) and Input(Y@Grad) should " "be equal, X is %d, Y@Grad is %d", x_dims[0], @@ -102,7 +102,7 @@ class CVMGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cvm_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When Attr(soft_label) == false, the 2nd dimension of " "Input(CVM) should be 2, but got %d cvm_dims[1]")); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index 1fbce90e494a0..5e127a532267b 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -110,7 +110,7 @@ class CVMCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( batch_size, lod[lod.size() - 1], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Input(X)'s dim[0] must be equal to last element of lod")); CvmComputeKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, @@ -164,7 +164,7 @@ class CVMGradCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( batch_size, lod[lod.size() - 1], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Output(X@GRAD)'s dim[0] must be equal to last element of lod")); phi::MixVector mixv_lod(&lod); CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index cc3a224a7e862..750310547306d 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -57,11 +57,11 @@ class DataNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("scale_w"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(scale_w) of DataNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("bias"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(bias) of DataNormOp should not be null.")); } @@ -69,39 +69,39 @@ class DataNormOp : public framework::OperatorWithKernel { const DataLayout data_layout = common::StringToDataLayout( ctx->Attrs().Get("data_layout")); - PADDLE_ENFORCE_EQ(x_dims.size() >= 2 && x_dims.size() <= 5, - true, - platform::errors::InvalidArgument( - "Input X must have 2 to 5 dimensions.")); + PADDLE_ENFORCE_EQ( + x_dims.size() >= 2 && x_dims.size() <= 5, + true, + phi::errors::InvalidArgument("Input X must have 2 to 5 dimensions.")); const int64_t C = (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSize").size(), - 1UL, - platform::errors::InvalidArgument( - "The input dim of BatchSize should be 1")); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSum").size(), - 1UL, - platform::errors::InvalidArgument( - "The input dim of BatchSum should be 1")); + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("BatchSize").size(), + 1UL, + phi::errors::InvalidArgument("The input dim of BatchSize should be 1")); + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("BatchSum").size(), + 1UL, + phi::errors::InvalidArgument("The input dim of BatchSum should be 1")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSquareSum").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim of BatchSquareSum should be 1")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSize")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSize should be C")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSum")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSum should be C")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSquareSum")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSquareSum should be C")); } @@ -112,21 +112,21 @@ class DataNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument("the dimension of scale" - "must equal to 1. But received: " - "the shape of scale is [%s], " - "the dimension of scale is [%d]", - scale_dim, - scale_dim.size())); + phi::errors::InvalidArgument("the dimension of scale" + "must equal to 1. But received: " + "the shape of scale is [%s], " + "the dimension of scale is [%d]", + scale_dim, + scale_dim.size())); PADDLE_ENFORCE_EQ( bias_dim.size(), 1UL, - platform::errors::InvalidArgument("the dimension of bias" - "must equal to 1. But received: " - "the shape of bias is [%s]," - "the dimension of bias is [%d]", - bias_dim, - bias_dim.size())); + phi::errors::InvalidArgument("the dimension of bias" + "must equal to 1. But received: " + "the shape of bias is [%s]," + "the dimension of bias is [%d]", + bias_dim, + bias_dim.size())); bool check = true; if ((!ctx->IsRuntime()) && @@ -137,14 +137,14 @@ class DataNormOp : public framework::OperatorWithKernel { if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -171,28 +171,28 @@ class DataNormOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ(dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "BatchSize"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "BatchSize input should be of float type")); - PADDLE_ENFORCE_EQ(dn_param_type, - OperatorWithKernel::IndicateVarDataType(ctx, "BatchSum"), - platform::errors::InvalidArgument( - "BatchSum input should be of float type")); + PADDLE_ENFORCE_EQ( + dn_param_type, + OperatorWithKernel::IndicateVarDataType(ctx, "BatchSum"), + phi::errors::InvalidArgument("BatchSum input should be of float type")); PADDLE_ENFORCE_EQ( dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "BatchSquareSum"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "BatchSquareSum input should be of float type")); bool enable_scale_and_shift = ctx.Attr("enable_scale_and_shift"); if (enable_scale_and_shift) { PADDLE_ENFORCE_EQ(dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "scale_w"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "scale_w input should be of float type")); - PADDLE_ENFORCE_EQ(dn_param_type, - OperatorWithKernel::IndicateVarDataType(ctx, "bias"), - platform::errors::InvalidArgument( - "bias input should be of float type")); + PADDLE_ENFORCE_EQ( + dn_param_type, + OperatorWithKernel::IndicateVarDataType(ctx, "bias"), + phi::errors::InvalidArgument("bias input should be of float type")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -208,7 +208,7 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be between 0.0 and 0.001.")); }); AddAttr("slot_dim", @@ -279,7 +279,7 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("The Input dim size should be 2")); + phi::errors::InvalidArgument("The Input dim size should be 2")); const int N = static_cast(x_dims[0]); const int C = static_cast(data_layout == DataLayout::kNCHW ? x_dims[1] @@ -287,11 +287,11 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_LT(0, N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); PADDLE_ENFORCE_LT(0, C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); auto *y = ctx.Output("Y"); @@ -401,7 +401,7 @@ class DataNormKernel : public framework::OpKernel { break; } default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unknown storage order: %d, please use NCHW or NHWC", data_layout)); } } @@ -421,17 +421,17 @@ class DataNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSize"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSize) of DataNormGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSum"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSum) of DataNormGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSquareSum"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSquareSum) of DataNormGradOp should not be null.")); OP_INOUT_CHECK(ctx->HasInput("Means"), "Input", "Means", "DataNormGrad"); OP_INOUT_CHECK(ctx->HasInput("Scales"), "Input", "Scales", "DataNormGrad"); @@ -471,7 +471,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Scale@GRAD) and Output(Bias@GRAD)" "must be null or not be null at same time. " "But now, has Scale@Grad=[%d], has Bias@GRAD=[%d]", @@ -489,7 +489,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Y@GRAD can not be found for computation")); } const phi::DenseTensor *t = nullptr; @@ -497,7 +497,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { t = &var->Get(); } if (t == nullptr) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Y@GRAD can not be found for computation")); } @@ -524,7 +524,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("The Input dim size should be 2")); + phi::errors::InvalidArgument("The Input dim size should be 2")); const int N = static_cast(x_dims[0]); const int C = static_cast(data_layout == DataLayout::kNCHW ? x_dims[1] @@ -710,7 +710,7 @@ class DataNormGradKernel : public framework::OpKernel { break; } default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unknown storage order: %s, please use NCHW or NHWC", data_layout_str)); } diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index 33cd6a8e6e49c..4be27b671d8a5 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -115,17 +115,17 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::PreconditionNotMet("The Input dim size should be 2")); + phi::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; PADDLE_ENFORCE_LT(0, N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); PADDLE_ENFORCE_LT(0, C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); const T *batch_size_in = @@ -174,7 +174,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::PreconditionNotMet("The Input dim size should be 2")); + phi::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; @@ -226,7 +226,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -238,7 +238,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { @@ -305,7 +305,7 @@ class DataNormGradKernel : public framework::OpKernel { } platform::GpuStreamSync(stream); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU, and need_sync_stats connot be " "supported on windows now.")); #endif diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cc b/paddle/fluid/operators/deformable_psroi_pooling_op.cc index 5b339cf96c2b1..1b6ed2ba0be62 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc @@ -148,7 +148,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rois_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(ROIs) should be a 2-D phi::DenseTensor of shape (num_rois, " "4) " "given as [[ x1, y1, x2, y2], ...]. The rank of Input(ROIs) should " @@ -158,12 +158,12 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( trans_dims.size(), 4, - platform::errors::InvalidArgument("The rank of Input(Trans) should be " - "4 and the shape of Trans should be " - "(N, 2, H, W), but received Trans " - "rank is:%d and Trans shape is:[%s].", - trans_dims.size(), - trans_dims)); + phi::errors::InvalidArgument("The rank of Input(Trans) should be " + "4 and the shape of Trans should be " + "(N, 2, H, W), but received Trans " + "rank is:%d and Trans shape is:[%s].", + trans_dims.size(), + trans_dims)); auto pooled_height = ctx->Attrs().Get("pooled_height"); auto pooled_width = ctx->Attrs().Get("pooled_width"); auto spatial_scale = ctx->Attrs().Get("spatial_scale"); @@ -176,17 +176,17 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { auto part_width = part_size[1]; auto sample_per_part = ctx->Attrs().Get("sample_per_part"); auto trans_std = ctx->Attrs().Get("trans_std"); - PADDLE_ENFORCE_GE(trans_std, - 0., - platform::errors::InvalidArgument( - "Input(trans_std) should not be lower " - "than 0.0, but received trans_std " - "is:%f", - trans_std)); + PADDLE_ENFORCE_GE( + trans_std, + 0., + phi::errors::InvalidArgument("Input(trans_std) should not be lower " + "than 0.0, but received trans_std " + "is:%f", + trans_std)); PADDLE_ENFORCE_GE( input_dims[1], output_channels, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The channel of Input(Input) should not be lower than " "Input(output_dim), " "but received Input channel is:%d and output_dim is:%d.", @@ -195,70 +195,70 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( pooled_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(pooled_height) should be greater than 0, but received " "pooled_height is:%d.", pooled_height)); PADDLE_ENFORCE_GT( pooled_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(pooled_width) should be greater than 0, but received " "pooled_width is:%d.", pooled_width)); PADDLE_ENFORCE_GT( spatial_scale, 0., - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(spatial_scale) should be greater than 0., but received " "spatial_scale is:%f.", spatial_scale)); PADDLE_ENFORCE_EQ( group_size.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(group_size) should be 2, but received " "group_size length is:%d.", group_size.size())); PADDLE_ENFORCE_GT( group_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "group_height in Input(group_size) should be greater than 0, " "but received group_height is:%d.", group_height)); PADDLE_ENFORCE_GT( group_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "group_width in Input(group_size) should be greater than 0 " "but received group_width is:%d.", group_width)); PADDLE_ENFORCE_EQ( part_size.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(part_size) should be 2, but received " "part_size length is:%d.", part_size.size())); PADDLE_ENFORCE_GT( part_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_height in Input(part_size) should be greater than 0 " "but received part_height is:%d.", part_height)); PADDLE_ENFORCE_GT( part_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_width in Input(part_size) should be greater than 0 " "but received part_width is:%d.", part_width)); PADDLE_ENFORCE_LE( part_height, trans_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_height in Input(part_size) should not be greater than " "the height of Input(Trans), but received part_height is:%d, " "the height of Input(Trans) is:%d.", @@ -267,7 +267,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE( part_width, trans_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_width in Input(part_size) should not be greater than " "the width of Input(Trans), but received part_width is:%d, " "the width of Input(Trans) is:%d.", @@ -276,7 +276,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( sample_per_part, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(sample_per_part) should be greater than 0, but received " "sample_per_part is:%d.", sample_per_part)); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index a3f045fd50a5f..1dfc02943b7fb 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -213,7 +213,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( num_rois, out->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of Input(ROIs) should be same with the number of " "Output(Output), but received ROIs number is:%d, Output number " "is:%d.", @@ -225,7 +225,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { no_trans ? output_dim : output_dim / num_classes; PADDLE_ENFORCE_GE(channels_each_class, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "channels_each_class should not be lower than 1, but " "channels_each_class is:%d.", channels_each_class)); @@ -243,7 +243,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -251,7 +251,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, @@ -555,7 +555,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -564,7 +564,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index 1ff1c83206f50..417e2da3468aa 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -187,7 +187,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( num_rois, out->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of Input(ROIs) should be same with the number of " "Output(Output), but received ROIs number is:%d, Output number " "is:%d.", @@ -221,7 +221,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { auto channels_each_class = no_trans ? output_dim : output_dim / num_classes; PADDLE_ENFORCE_GE(channels_each_class, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "channels_each_class should not be lower than 1, but " "channels_each_class is:%d.", channels_each_class)); @@ -238,7 +238,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -246,7 +246,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, @@ -542,7 +542,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, diff --git a/paddle/fluid/operators/dequantize_log_op.cc b/paddle/fluid/operators/dequantize_log_op.cc index 03ede45695148..7526bdb49eafd 100644 --- a/paddle/fluid/operators/dequantize_log_op.cc +++ b/paddle/fluid/operators/dequantize_log_op.cc @@ -62,14 +62,14 @@ class DequantizeLogOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of DequantizeLogOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of DequantizeLogOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of DequantizeLogOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of DequantizeLogOp is not found.")); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/dequeue_op.cc b/paddle/fluid/operators/dequeue_op.cc index 9e5b809e772b6..8fcc0fbfb47da 100644 --- a/paddle/fluid/operators/dequeue_op.cc +++ b/paddle/fluid/operators/dequeue_op.cc @@ -42,7 +42,7 @@ class DequeueOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::NotFound( + phi::errors::NotFound( "No LoDTensorBlockingQueueHolder variable with name %s found.", queue_name)); auto* queue_holder = @@ -50,17 +50,17 @@ class DequeueOp : public framework::OperatorBase { auto& out_names = Outputs("Out"); PADDLE_ENFORCE_GT(out_names.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output for Op(dequeue) must be set.")); for (const auto& out_name : out_names) { auto out_var = scope.FindVar(out_name); - PADDLE_ENFORCE_NOT_NULL(out_var, - platform::errors::NotFound( - "No variable with name %s found", out_name)); + PADDLE_ENFORCE_NOT_NULL( + out_var, + phi::errors::NotFound("No variable with name %s found", out_name)); auto* out_tensor = out_var->GetMutable(); PADDLE_ENFORCE_NOT_NULL( out_tensor, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable with name %s has not been initialized.", out_name)); paddle::framework::LoDTensorArray lod_tensor_vec; @@ -68,7 +68,7 @@ class DequeueOp : public framework::OperatorBase { lod_tensor_vec = queue_holder->GetQueue()->Pop(&success); PADDLE_ENFORCE_EQ(lod_tensor_vec.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected to pop only one element per Pop call for " "Op(dequeue), but poped %d element.", lod_tensor_vec.size())); diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cc b/paddle/fluid/operators/detection/anchor_generator_op.cc index 8c3705ba3e760..3b826d5c249e1 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cc +++ b/paddle/fluid/operators/detection/anchor_generator_op.cc @@ -25,24 +25,24 @@ class AnchorGeneratorOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Input"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Input) of AnchorGeneratorOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Anchors"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Anchors) of AnchorGeneratorOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Variances"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Variances) of AnchorGeneratorOp should not be null.")); auto input_dims = ctx->GetInputDim("Input"); PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); auto anchor_sizes = ctx->Attrs().Get>("anchor_sizes"); auto aspect_ratios = ctx->Attrs().Get>("aspect_ratios"); @@ -98,12 +98,12 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const std::vector& anchor_sizes) { PADDLE_ENFORCE_GT(anchor_sizes.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of anchor_sizes must be at least 1.")); for (size_t i = 0; i < anchor_sizes.size(); ++i) { PADDLE_ENFORCE_GT(anchor_sizes[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "anchor_sizes[%d] must be positive.", i)); } }); @@ -118,14 +118,14 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { "(vector) List of variances to be used " "in box regression deltas") .AddCustomChecker([](const std::vector& variances) { - PADDLE_ENFORCE_EQ(variances.size(), - 4UL, - platform::errors::InvalidArgument( - "Must provide 4 variance only.")); + PADDLE_ENFORCE_EQ( + variances.size(), + 4UL, + phi::errors::InvalidArgument("Must provide 4 variance only.")); for (size_t i = 0; i < variances.size(); ++i) { PADDLE_ENFORCE_GT(variances[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "variance[%d] must be greater than 0.", i)); } }); @@ -138,12 +138,12 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( stride.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Must provide 2 stride for width and height only.")); for (size_t i = 0; i < stride.size(); ++i) { PADDLE_ENFORCE_GT(stride[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "stride[%d] should be larger than 0.", i)); } }); diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 53c082add0fa5..32942a03f1ab4 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -26,24 +26,24 @@ class BipartiteMatchOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("DistMat"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(DistMat) of BipartiteMatch should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("ColToRowMatchIndices"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(ColToRowMatchIndices) of BipartiteMatch " "should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("ColToRowMatchDist"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(ColToRowMatchDist) of BipartiteMatch should not be null.")); auto dims = ctx->GetInputDim("DistMat"); - PADDLE_ENFORCE_EQ(dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(DistMat) must be 2.")); + PADDLE_ENFORCE_EQ( + dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(DistMat) must be 2.")); ctx->SetOutputDim("ColToRowMatchIndices", dims); ctx->SetOutputDim("ColToRowMatchDist", dims); @@ -75,7 +75,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist.dims().size(), 2, - platform::errors::InvalidArgument("The rank of dist must be 2.")); + phi::errors::InvalidArgument("The rank of dist must be 2.")); int64_t row = dist.dims()[0]; int64_t col = dist.dims()[1]; auto* dist_data = dist.data(); @@ -140,7 +140,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( match_indices[max_idx], -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The match_indices must be initialized to -1 at [%d].", max_idx)); match_indices[max_idx] = max_row_idx; @@ -183,7 +183,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( match_indices[j], -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The match_indices must be initialized to -1 at [%d].", j)); match_indices[j] = max_row_idx; match_dist[j] = max_dist; @@ -208,7 +208,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_mat->lod().size(), 1UL, - platform::errors::InvalidArgument("Only support 1 level of LoD.")); + phi::errors::InvalidArgument("Only support 1 level of LoD.")); } match_indices->mutable_data({n, col}, context.GetPlace()); match_dist->mutable_data({n, col}, context.GetPlace()); diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc index 5af100b8f6407..8df39b759cabb 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cc +++ b/paddle/fluid/operators/detection/box_clip_op.cc @@ -24,12 +24,12 @@ class BoxClipOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true, - platform::errors::NotFound("Input(Input) of BoxClipOp " - "is not found.")); + phi::errors::NotFound("Input(Input) of BoxClipOp " + "is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) of BoxClipOp " - "is not found.")); + phi::errors::NotFound("Input(ImInfo) of BoxClipOp " + "is not found.")); auto input_box_dims = ctx->GetInputDim("Input"); auto im_info_dims = ctx->GetInputDim("ImInfo"); @@ -39,20 +39,20 @@ class BoxClipOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_box_dims[input_box_size - 1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(Input) in BoxClipOp must be 4. " "But received last dimension = %d", input_box_dims[input_box_size - 1])); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Input) in BoxClipOp must be 2." " But received rank = %d", im_info_dims.size())); PADDLE_ENFORCE_EQ( im_info_dims[1], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(ImInfo) of BoxClipOp must be 3. " "But received last dimension = %d", im_info_dims[1])); diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index c07185dec167c..18faf1e2fbbcd 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -31,7 +31,7 @@ class BoxClipKernel : public framework::OpKernel { if (input_box->lod().size()) { PADDLE_ENFORCE_EQ(input_box->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Input) of BoxClip only supports 1 level " "of LoD. But received the " "level = %d", diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc index 552a6da3b3425..a7b9ad490b56c 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc @@ -23,33 +23,33 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("PriorBox"), true, - platform::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("PriorBoxVar"), true, - platform::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp" - " is not found.")); + phi::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp" + " is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("TargetBox"), true, - platform::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("BoxScore"), true, - platform::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("DecodeBox"), true, - platform::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp" - " is not found.")); + phi::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp" + " is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("OutputAssignBox"), true, - platform::errors::NotFound("Output(OutputAssignBox) of " - "BoxDecoderAndAssignOp is not found.")); + phi::errors::NotFound("Output(OutputAssignBox) of " + "BoxDecoderAndAssignOp is not found.")); auto prior_box_dims = ctx->GetInputDim("PriorBox"); auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); @@ -59,45 +59,45 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( prior_box_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of PriorBox must" - " be 2. But received rank = %d", - prior_box_dims.size())); + phi::errors::InvalidArgument("The rank of Input of PriorBox must" + " be 2. But received rank = %d", + prior_box_dims.size())); PADDLE_ENFORCE_EQ( prior_box_dims[1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of PriorBox is [N, 4], " "and the second dimension must be 4. But received dimension = %d", prior_box_dims[1])); PADDLE_ENFORCE_EQ( prior_box_var_dims.size(), 1, - platform::errors::InvalidArgument("The rank of Input of PriorBoxVar " - "must be 1. But received rank = %d", - prior_box_var_dims.size())); + phi::errors::InvalidArgument("The rank of Input of PriorBoxVar " + "must be 1. But received rank = %d", + prior_box_var_dims.size())); PADDLE_ENFORCE_EQ( prior_box_var_dims[0], 4, - platform::errors::InvalidArgument("The shape of PriorBoxVar is [4]. " - "But received dimension = %d", - prior_box_var_dims[0])); + phi::errors::InvalidArgument("The shape of PriorBoxVar is [4]. " + "But received dimension = %d", + prior_box_var_dims[0])); PADDLE_ENFORCE_EQ( target_box_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of TargetBox must " - "be 2. But received rank = %d", - target_box_dims.size())); + phi::errors::InvalidArgument("The rank of Input of TargetBox must " + "be 2. But received rank = %d", + target_box_dims.size())); PADDLE_ENFORCE_EQ( box_score_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of BoxScore must " - "be 2. But received rank = %d", - box_score_dims.size())); + phi::errors::InvalidArgument("The rank of Input of BoxScore must " + "be 2. But received rank = %d", + box_score_dims.size())); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( prior_box_dims[0], target_box_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of prior_box and " "target_box is the number of box and should be same. But " "received dimension of prior_box is %d, dimension of target_box " @@ -107,7 +107,7 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( prior_box_dims[0], box_score_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of prior_box and " "box_score is the number of box and should be same. But received " "dimension of prior_box is %d, dimension of box_score is %d", @@ -116,7 +116,7 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( target_box_dims[1], box_score_dims[1] * prior_box_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of target_box is " "[N, classnum * 4], The shape of box_score is [N, classnum], " "The shape of prior_box is [N, 4]. But received second dimension " diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index db2f9726db56a..fd5161932ff22 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -24,18 +24,18 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( context->HasInputs("MultiLevelRois"), true, - platform::errors::NotFound("Inputs(MultiLevelRois) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Inputs(MultiLevelRois) of " + "CollectFpnProposalsOp is not found")); PADDLE_ENFORCE_EQ( context->HasInputs("MultiLevelScores"), true, - platform::errors::NotFound("Inputs(MultiLevelScores) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Inputs(MultiLevelScores) of " + "CollectFpnProposalsOp is not found")); PADDLE_ENFORCE_EQ( context->HasOutput("FpnRois"), true, - platform::errors::NotFound("Outputs(MultiFpnRois) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Outputs(MultiFpnRois) of " + "CollectFpnProposalsOp is not found")); auto roi_dims = context->GetInputsDim("MultiLevelRois"); auto score_dims = context->GetInputsDim("MultiLevelScores"); auto post_nms_topN = context->Attrs().Get("post_nms_topN"); @@ -44,7 +44,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( roi_dim[1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Second dimension of Input" "(MultiLevelRois) must be 4. But received dimension = %d", roi_dim[1])); @@ -53,7 +53,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( score_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Second dimension of Input" "(MultiLevelScores) must be 1. But received dimension = %d", score_dim[1])); @@ -79,7 +79,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( roi_lod, score_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(MultiLevelRois) and " "Inputs(MultiLevelScores) should have same lod.")); } diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index 462b4a4584ece..81356170598bf 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -76,7 +76,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(post_nms_topN, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The parameter post_nms_topN must be " "a positive integer. But received post_nms_topN = %d", post_nms_topN)); @@ -85,7 +85,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( multi_layer_rois.size(), multi_layer_scores.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of RoIs and Scores should" " be the same. But received number of RoIs is %d, number of Scores " "is %d", diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc index e79de60b7690d..4a533615aab15 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op.cc @@ -29,7 +29,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( image_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Image) of Op(density_prior_box) should be a 4-D Tensor " "and data format is NCHW. But received Image's dimensions = %d, " "shape = [%s].", @@ -38,7 +38,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Input) of Op(density_prior_box) should be a 4-D Tensor " "and data format is NCHW. But received Input's dimensions = %d, " "shape = [%s].", @@ -49,7 +49,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( input_dims[2], image_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Input's height" "of DensityPriorBoxOp should be smaller than input tensor Image's" "height. But received Input's height = %d, Image's height = %d", @@ -59,7 +59,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( input_dims[3], image_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Input's width" "of DensityPriorBoxOp should be smaller than input tensor Image's" "width. But received Input's width = %d, Image's width = %d", @@ -76,7 +76,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( fixed_sizes.size(), densities.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of fixed_sizes and densities must be equal. " "But received: fixed_sizes's length is %d, densities's length " "is %d", @@ -139,14 +139,14 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const std::vector& variances) { PADDLE_ENFORCE_EQ(variances.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of variance must " "be 4. But received: variances' length is %d.", variances.size())); for (size_t i = 0; i < variances.size(); ++i) { PADDLE_ENFORCE_GT(variances[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "variance[%d] must be greater " "than 0. But received: variance[%d] = %f", i, @@ -165,24 +165,24 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { "Density prior boxes step across width, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_w) { - PADDLE_ENFORCE_GE(step_w, - 0.0, - platform::errors::InvalidArgument( - "step_w should be larger " - "than 0. But received: step_w = %f.", - step_w)); + PADDLE_ENFORCE_GE( + step_w, + 0.0, + phi::errors::InvalidArgument("step_w should be larger " + "than 0. But received: step_w = %f.", + step_w)); }); AddAttr( "step_h", "Density prior boxes step across height, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_h) { - PADDLE_ENFORCE_GE(step_h, - 0.0, - platform::errors::InvalidArgument( - "step_h should be larger " - "than 0. But received: step_h = %f.", - step_h)); + PADDLE_ENFORCE_GE( + step_h, + 0.0, + phi::errors::InvalidArgument("step_h should be larger " + "than 0. But received: step_h = %f.", + step_h)); }); AddAttr("offset", @@ -198,7 +198,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( fixed_sizes[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "fixed_sizes[%d] should be " "larger than 0. But received: fixed_sizes[%d] = %f", i, @@ -216,7 +216,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( fixed_ratios[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "fixed_ratios[%d] should be " "larger than 0. But received: fixed_ratios[%d] = %f", i, @@ -234,7 +234,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( densities[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "densities[%d] should be " "larger than 0. But received: densities[%d] = %f.", i, diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index bf56a6f857e0d..5ee843d72387b 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -44,57 +44,57 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("GtClasses"), - true, - platform::errors::InvalidArgument( - "Input(GtClasses) shouldn't be null.")); + phi::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("GtClasses"), + true, + phi::errors::InvalidArgument("Input(GtClasses) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("IsCrowd"), true, - platform::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); + phi::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtSegms"), true, - platform::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); + phi::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Rois"), true, - platform::errors::InvalidArgument("Input(Rois) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("LabelsInt32"), - true, - platform::errors::InvalidArgument( - "Input(LabelsInt32) shouldn't be null.")); + phi::errors::InvalidArgument("Input(Rois) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("LabelsInt32"), + true, + phi::errors::InvalidArgument("Input(LabelsInt32) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("MaskRois"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MaskRois) of GenerateMaskLabelsOp should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("RoiHasMaskInt32"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(RoiHasMaskInt32) of GenerateMaskLabelsOp " "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("MaskInt32"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MaskInt32) of GenerateMaskLabelsOp should not be null")); auto im_info_dims = ctx->GetInputDim("ImInfo"); auto gt_segms_dims = ctx->GetInputDim("GtSegms"); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(ImInfo) must be 2.")); - PADDLE_ENFORCE_EQ(gt_segms_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(GtSegms) must be 2.")); + PADDLE_ENFORCE_EQ( + im_info_dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(ImInfo) must be 2.")); + PADDLE_ENFORCE_EQ( + gt_segms_dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(GtSegms) must be 2.")); PADDLE_ENFORCE_EQ(gt_segms_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of Input(GtSegms) must be 2.")); int num_classes = ctx->Attrs().Get("num_classes"); int resolution = ctx->Attrs().Get("resolution"); @@ -170,7 +170,7 @@ std::vector SampleMaskForOneImage( const int* label_int32_data = label_int32.data(); PADDLE_ENFORCE_EQ(roi_size, label_int32.dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of label [%d] is the different from " "roi_size [%d], they should be same.", label_int32.dims()[0], @@ -197,7 +197,7 @@ std::vector SampleMaskForOneImage( int e = static_cast(lod2[s_idx + j + 1]); PADDLE_ENFORCE_NE(s, e, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The start point and the end point in the poly " "segment [%d] should not be same, but received " "the start point [%d] and the end point [%d].", @@ -349,34 +349,34 @@ class GenerateMaskLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_classes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp gt_classes needs 1 level of LoD")); PADDLE_ENFORCE_EQ( is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp is_crowd needs 1 level of LoD")); PADDLE_ENFORCE_EQ(rois->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp rois needs 1 level of LoD")); PADDLE_ENFORCE_EQ( label_int32->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp label_int32 needs 1 level of LoD")); PADDLE_ENFORCE_EQ( gt_segms->lod().size(), 3UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp gt_segms needs 3 level of LoD")); int64_t n = static_cast(gt_classes->lod().back().size() - 1); PADDLE_ENFORCE_EQ( gt_segms->lod()[0].size() - 1, n, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of Input(gt_segms) and Input(gt_classes) should be " "same, but received gt_segms[%d], gt_classes[%d].", gt_segms->lod()[0].size() - 1, diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index a0fb3ec799eea..ad37aa2ae682f 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -68,49 +68,49 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("RpnRois"), true, - platform::errors::NotFound("Input(RpnRois) shouldn't be null.")); + phi::errors::NotFound("Input(RpnRois) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtClasses"), true, - platform::errors::NotFound("Input(GtClasses) shouldn't be null.")); + phi::errors::NotFound("Input(GtClasses) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("IsCrowd"), true, - platform::errors::NotFound("Input(IsCrowd) shouldn't be null.")); + phi::errors::NotFound("Input(IsCrowd) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtBoxes"), true, - platform::errors::NotFound("Input(GtBoxes) shouldn't be null.")); + phi::errors::NotFound("Input(GtBoxes) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) shouldn't be null.")); + phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Rois"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Rois) of GenerateProposalLabelsOp should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("LabelsInt32"), true, - platform::errors::NotFound("Output(LabelsInt32) of " - "GenerateProposalLabelsOp " - "should not be null")); + phi::errors::NotFound("Output(LabelsInt32) of " + "GenerateProposalLabelsOp " + "should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("BboxTargets"), true, - platform::errors::NotFound("Output(BboxTargets) of " - "GenerateProposalLabelsOp " - "should not be null")); + phi::errors::NotFound("Output(BboxTargets) of " + "GenerateProposalLabelsOp " + "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BboxInsideWeights"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BboxInsideWeights) of GenerateProposalLabelsOp " "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BboxOutsideWeights"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BboxOutsideWeights) of GenerateProposalLabelsOp " "should not be null")); @@ -120,21 +120,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(RpnRois) must be 2. " "But received dimensions size=[%d], dimensions=[%s].", rpn_rois_dims.size(), rpn_rois_dims)); PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(GtBoxes) must be 2. " "But received dimensions size=[%d], dimensions=[%s].", gt_boxes_dims.size(), gt_boxes_dims)); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(ImInfo) must be 2. But " "received dimensions size=[%d], dimensions=[%s].", im_info_dims.size(), @@ -146,7 +146,7 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("MaxOverlap"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(MaxOverlap) of GenerateProposalLabelsOp " "should not be null when is_cascade_rcnn is True.")); } @@ -544,7 +544,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rpn_rois->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", rpn_rois->lod().size(), @@ -552,7 +552,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_classes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp gt_classes needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", gt_classes->lod().size(), @@ -560,7 +560,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp is_crowd needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", is_crowd->lod().size(), @@ -568,7 +568,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_boxes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", gt_boxes->lod().size(), diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index 710db1668e237..5e961674cd774 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -35,23 +35,23 @@ class GenerateProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Scores"), true, - platform::errors::NotFound("Input(Scores) shouldn't be null.")); + phi::errors::NotFound("Input(Scores) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("BboxDeltas"), true, - platform::errors::NotFound("Input(BboxDeltas) shouldn't be null.")); + phi::errors::NotFound("Input(BboxDeltas) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) shouldn't be null.")); + phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Anchors"), true, - platform::errors::NotFound("Input(Anchors) shouldn't be null.")); + phi::errors::NotFound("Input(Anchors) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Variances"), true, - platform::errors::NotFound("Input(Variances) shouldn't be null.")); + phi::errors::NotFound("Input(Variances) shouldn't be null.")); ctx->SetOutputDim("RpnRois", {-1, 4}); ctx->SetOutputDim("RpnRoiProbs", {-1, 1}); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index d24cbcb81d019..1bb494f7fa508 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -151,7 +151,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { float eta = context.Attr("eta"); PADDLE_ENFORCE_GE(eta, 1., - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Not support adaptive NMS. The attribute 'eta' " "should not less than 1. But received eta=[%d]", eta)); diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc index 8d3ed1a033acf..cd3f67e3cc007 100644 --- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc +++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc @@ -39,20 +39,20 @@ class LocalityAwareNMSOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( score_size, 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Scores) must be 3. But received %d.", score_size)); PADDLE_ENFORCE_EQ( box_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(BBoxes) must be 3. But received %d.", box_dims.size())); PADDLE_ENFORCE_EQ( box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || box_dims[2] == 24 || box_dims[2] == 32, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(BBoxes) must be 4 or 8, " "represents the layout of coordinate " "[xmin, ymin, xmax, ymax] or " @@ -65,7 +65,7 @@ class LocalityAwareNMSOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( box_dims[1], score_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input(BBoxes) must be equal to " "last dimension of Input(Scores), which represents the " "predicted bboxes. But received the 2nd dimension of " diff --git a/paddle/fluid/operators/detection/mine_hard_examples_op.cc b/paddle/fluid/operators/detection/mine_hard_examples_op.cc index 0ce9979ff2a3d..382705d2879e1 100644 --- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/detection/mine_hard_examples_op.cc @@ -193,18 +193,18 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of ClsLoss is [N, Np]. But received %d.", cls_loss_dims.size())); PADDLE_ENFORCE_EQ( idx_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of MatchIndices is [N, Np]. But received %d.", idx_dims.size())); PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of MatchDist is [N, Np]. But received %d.", dis_dims.size())); @@ -212,13 +212,13 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { auto loc_loss_dims = ctx->GetInputDim("LocLoss"); PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of LocLoss is [N, Np]. But received %d.", loc_loss_dims.size())); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(cls_loss_dims[0], loc_loss_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batch size of ClsLoss and LocLoss must be the " "same. But received batch size of ClsLoss was " "%d, batch size of LocLoss was %d.", @@ -226,7 +226,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { loc_loss_dims[0])); PADDLE_ENFORCE_EQ(cls_loss_dims[1], loc_loss_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Prior box number of ClsLoss and LocLoss must be " "the same. But received box number of ClsLoss " "was %d, box number of LocLoss was %d.", @@ -238,7 +238,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(cls_loss_dims[0], idx_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batch size of ClsLoss and MatchIndices must be " "the same. But received batch size of ClsLoss was " "%d, batch size of MatchIndices was %d.", @@ -247,7 +247,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cls_loss_dims[1], idx_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Prior box number of ClsLoss and " "MatchIndices must be the same. But received box number of " "ClsLoss was %d, box number of MatchIndices was %d.", @@ -256,7 +256,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(cls_loss_dims[0], dis_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batch size of ClsLoss and MatchDist must be the " "same. But received batch size of ClsLoss was %d, " "batch size of MatchDist was %d.", @@ -264,7 +264,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { dis_dims[0])); PADDLE_ENFORCE_EQ(cls_loss_dims[1], idx_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Prior box number of ClsLoss and MatchDist must be " "the same. But received box number of ClsLoss was " "%d, box number of MatchDist was %d.", @@ -277,7 +277,7 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NE(mining_type, MiningType::kNone, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "mining_type must be hard_example or max_negative")); if (mining_type == MiningType::kMaxNegative) { @@ -285,30 +285,30 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { auto neg_dist_threshold = ctx->Attrs().Get("neg_dist_threshold"); PADDLE_ENFORCE_GT(neg_pos_ratio, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "neg_pos_ratio must greater than zero in " "max_negative mode. But received %f.", neg_pos_ratio)); PADDLE_ENFORCE_LT(neg_dist_threshold, 1.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "neg_dist_threshold must less than one in " "max_negative mode. But received %f.", neg_dist_threshold)); PADDLE_ENFORCE_GT(neg_dist_threshold, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "neg_dist_threshold must greater " "than zero in max_negative mode. But received %f.", neg_dist_threshold)); } else if (mining_type == MiningType::kHardExample) { auto sample_size = ctx->Attrs().Get("sample_size"); - PADDLE_ENFORCE_GT(sample_size, - 0, - platform::errors::InvalidArgument( - "sample_size must greater than zero in " - "hard_example mode. But received %d.", - sample_size)); + PADDLE_ENFORCE_GT( + sample_size, + 0, + phi::errors::InvalidArgument("sample_size must greater than zero in " + "hard_example mode. But received %d.", + sample_size)); } ctx->SetOutputDim("UpdatedMatchIndices", idx_dims); diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 9cd9e76772424..73ec6caa61c27 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -47,22 +47,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(score_size == 2 || score_size == 3, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Scores) must be 2 or 3" ". But received rank = %d", score_size)); - PADDLE_ENFORCE_EQ(box_dims.size(), - 3, - platform::errors::InvalidArgument( - "The rank of Input(BBoxes) must be 3" - ". But received rank = %d", - box_dims.size())); + PADDLE_ENFORCE_EQ( + box_dims.size(), + 3, + phi::errors::InvalidArgument("The rank of Input(BBoxes) must be 3" + ". But received rank = %d", + box_dims.size())); if (score_size == 3) { PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || box_dims[2] == 24 || box_dims[2] == 32, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input" "(BBoxes) must be 4 or 8, " "represents the layout of coordinate " @@ -74,7 +74,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( box_dims[1], score_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input(BBoxes) must be equal to " "last dimension of Input(Scores), which represents the " "predicted bboxes." @@ -84,14 +84,14 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(box_dims[2], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input" "(BBoxes) must be 4. But received dimension = %d", box_dims[2])); PADDLE_ENFORCE_EQ( box_dims[1], score_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input" "(BBoxes) must be equal to the 2nd dimension of Input(Scores). " "But received box dimension = %d, score dimension = %d", diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 0059aedcdc86c..35518b224e5ad 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -21,10 +21,9 @@ template class PolygonBoxTransformCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_cpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* in = ctx.Input("Input"); auto in_dims = common::vectorize(in->dims()); const T* in_data = in->data(); @@ -66,12 +65,12 @@ class PolygonBoxTransformOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input's rank must be 4. But received: Input rank is [%d]", in_dim.size())); PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input's second dimension must be even. But " "received: Input 2nd dimension is [%d]", in_dim[1])); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu index 4f182464f77b5..b23a8d4e41bc5 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cu +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -45,7 +45,7 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The polygon_box_transform operator needs to be executed on GPU.")); auto* in = ctx.Input("Input"); auto in_dims = in->dims(); diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc index b97cfe81a5a17..f43c7ec644a76 100644 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc @@ -26,28 +26,28 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("BBoxes").size(), 1UL, - platform::errors::InvalidArgument("The length of Input(BBoxes) should " - "be greater than 0, but received " - "BBoxes length is:%d.", - ctx->Inputs("BBoxes").size())); + phi::errors::InvalidArgument("The length of Input(BBoxes) should " + "be greater than 0, but received " + "BBoxes length is:%d.", + ctx->Inputs("BBoxes").size())); PADDLE_ENFORCE_GE( ctx->Inputs("Scores").size(), 1UL, - platform::errors::InvalidArgument("The length of Input(Scores) should " - "be greater than 0, but received " - "Scores length is:%d.", - ctx->Inputs("Scores").size())); + phi::errors::InvalidArgument("The length of Input(Scores) should " + "be greater than 0, but received " + "Scores length is:%d.", + ctx->Inputs("Scores").size())); PADDLE_ENFORCE_GE( ctx->Inputs("Anchors").size(), 1UL, - platform::errors::InvalidArgument("The length of Input(Anchors) should " - "be greater than 0, but received " - "Anchors length is:%d.", - ctx->Inputs("Anchors").size())); + phi::errors::InvalidArgument("The length of Input(Anchors) should " + "be greater than 0, but received " + "Anchors length is:%d.", + ctx->Inputs("Anchors").size())); PADDLE_ENFORCE_EQ( ctx->Inputs("BBoxes").size(), ctx->Inputs("Scores").size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(BBoxes) and Input(Scores) should have the same length, but " "received BBoxes length is:%d, Scores length is:%d.", ctx->Inputs("BBoxes").size(), @@ -55,7 +55,7 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->Inputs("BBoxes").size(), ctx->Inputs("Anchors").size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(BBoxes) and Input(Anchors) should have the same length, but " "received BBoxes length is:%d, Anchors length is:%d.", ctx->Inputs("BBoxes").size(), @@ -73,25 +73,25 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { auto im_info_dims = ctx->GetInputDim("ImInfo"); const size_t b_n = bboxes_dims.size(); - PADDLE_ENFORCE_GT(b_n, - 0, - platform::errors::InvalidArgument( - "The number of Variables in Input(BBoxes) " - "should be greater than 0, " - "but received number is:%d.", - b_n)); + PADDLE_ENFORCE_GT( + b_n, + 0, + phi::errors::InvalidArgument("The number of Variables in Input(BBoxes) " + "should be greater than 0, " + "but received number is:%d.", + b_n)); const size_t s_n = scores_dims.size(); - PADDLE_ENFORCE_GT(s_n, - 0, - platform::errors::InvalidArgument( - "The number of Variables in Input(Scores) " - "should be greater than 0, " - "but received number is:%d.", - s_n)); + PADDLE_ENFORCE_GT( + s_n, + 0, + phi::errors::InvalidArgument("The number of Variables in Input(Scores) " + "should be greater than 0, " + "but received number is:%d.", + s_n)); const size_t a_n = anchors_dims.size(); PADDLE_ENFORCE_GT(a_n, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of Variables in Input(Anchors) " "should be greater than 0, " "but received number is:%d.", @@ -103,35 +103,35 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( score_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of each Variable in Input(Scores) must be 3, " "but received rank is:%d.", score_dims.size())); PADDLE_ENFORCE_EQ( bbox_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of each Variable in Input(BBoxes) must be 3, " "but received rank is:%d.", bbox_dims.size())); PADDLE_ENFORCE_EQ( anchor_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of each Variable in Input(Anchors) must be 2, " "but received rank is:%d.", anchor_dims.size())); PADDLE_ENFORCE_EQ( bbox_dims[2], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of each Variable in Input(BBoxes) must be 4 " "representing the layout of coordinate [xmin, ymin, xmax, ymax], " "but received dimension is:%d.", bbox_dims[2])); PADDLE_ENFORCE_EQ(bbox_dims[1], score_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Variables in Input(BBoxes) " "and Input(Scores) " "must be same, which represents the number of the " @@ -143,7 +143,7 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( anchor_dims[0], bbox_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of each Variables in Input(Anchors) must be " "equal " "to the 2nd dimension of corresponding Variables in " @@ -155,7 +155,7 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { bbox_dims[1])); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(ImInfo) must be 2, but " "received ImInfo rank is:%d.", im_info_dims.size())); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 81e8d0d3edf7e..d3c315b7bdfc5 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -66,21 +66,21 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { auto im_info_dims = ctx->GetInputDim("ImInfo"); PADDLE_ENFORCE_EQ(anchor_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(Anchor) must be 2. But " "received dimensions size=[%d], dimensions=[%s].", anchor_dims.size(), anchor_dims)); PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(GtBoxes) must be 2. " "But received dimensions size=[%d], dimensions=[%s].", gt_boxes_dims.size(), gt_boxes_dims)); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(ImInfo) must be 2. But " "received dimensions size=[%d], dimensions=[%s].", im_info_dims.size(), @@ -411,14 +411,14 @@ class RpnTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RpnTargetAssignOp gt_boxes needs 1 level of LoD. " "But received level of LoD is [%d], LoD is [%s].", gt_boxes->lod().size(), gt_boxes->lod())); PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RpnTargetAssignOp is_crowd needs 1 level of LoD. " "But received level of LoD is [%d], LoD is [%s].", is_crowd->lod().size(), @@ -567,7 +567,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( total_loc_num, max_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of sampled bboxes should not be greater than the " "number of all anchor boxes(%d), but the number of sampled " "bboxes is :%d.", @@ -576,7 +576,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( total_score_num, max_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of sampled scores should not be greater than the " "number of all anchor boxes(%d), but the number of sampled " "scores is :%d.", @@ -815,7 +815,7 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( anchor_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Anchor) should be 2, but received Anchor " "rank is :%d, Anchor shape is:[%s].", anchor_dims.size(), @@ -823,7 +823,7 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( gt_boxes_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(GtBoxes) should be 2, but received GtBoxes " "rank is :%d, GtBoxes shape is:[%s].", gt_boxes_dims.size(), @@ -831,7 +831,7 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( gt_labels_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(GtLabels) should be 2, but received GtLabels " "rank is :%d, GtLabels shape is:[%s].", gt_labels_dims.size(), @@ -839,7 +839,7 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(ImInfo) should be 2, but received ImInfo " "rank is :%d, ImInfo shape is:[%s].", im_info_dims.size(), @@ -1019,21 +1019,21 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_boxes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(GtBoxes) should be 1, but received GtBoxes " "LoD level is :%d.", gt_boxes->lod().size())); PADDLE_ENFORCE_EQ( gt_labels->lod().size(), 1UL, - platform::errors::InvalidArgument("The LoD level of Input(GtLabels) " - "should be 1, but received GtLabels " - "LoD level is :%d.", - gt_labels->lod().size())); + phi::errors::InvalidArgument("The LoD level of Input(GtLabels) " + "should be 1, but received GtLabels " + "LoD level is :%d.", + gt_labels->lod().size())); PADDLE_ENFORCE_EQ( is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(IsCrowd) should be 1, but received IsCrowd " "LoD level is :%d.", is_crowd->lod().size())); @@ -1190,7 +1190,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( total_loc_num, max_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of sampled bboxes should not be greater than the " "number of all anchor boxes(%d), but the number of sampled " "bboxes is :%d.", @@ -1199,7 +1199,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( total_score_num, max_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of sampled scores should not be greater than the " "number of all anchor boxes(%d), but the number of sampled " "scores is :%d.", @@ -1208,7 +1208,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( total_fg_num, batch_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of foreground numbers should not be greater than the " "batch size(%d), but the number of foreground numbers is :%d.", batch_num, diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index a0879337f5ae7..cee37d49eb69b 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -45,28 +45,28 @@ class DetectionMAPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( det_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(DetectRes) ndim must be 2, the shape is [N, 6]," "but received the ndim is %d", det_dims.size())); PADDLE_ENFORCE_EQ( det_dims[1], 6UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape is of Input(DetectRes) [N, 6], but received" " shape is [N, %d]", det_dims[1])); auto label_dims = ctx->GetInputDim("Label"); PADDLE_ENFORCE_EQ(label_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ndim of Input(Label) must be 2, but received %d", label_dims.size())); if (ctx->IsRuntime() || label_dims[1] > 0) { PADDLE_ENFORCE_EQ( (label_dims[1] == 6 || label_dims[1] == 5), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Label) is [N, 6] or [N, 5], but received " "[N, %d]", label_dims[1])); @@ -75,12 +75,12 @@ class DetectionMAPOp : public framework::OperatorWithKernel { if (ctx->HasInput("PosCount")) { PADDLE_ENFORCE( ctx->HasInput("TruePos"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(TruePos) of DetectionMAPOp should not be null when " "Input(PosCount) is not null.")); PADDLE_ENFORCE( ctx->HasInput("FalsePos"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(FalsePos) of DetectionMAPOp should not be null when " "Input(PosCount) is not null.")); } @@ -197,7 +197,7 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_NE( GetAPType(ap_type), APType::kNone, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ap_type should be 'integral' or '11point.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index ccf0834968793..24fea9c431c63 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -82,12 +82,12 @@ class DetectionMAPOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( label_lod.size(), 1UL, - platform::errors::InvalidArgument("Only support LodTensor of lod_level " - "with 1 in label, but received %d.", - label_lod.size())); + phi::errors::InvalidArgument("Only support LodTensor of lod_level " + "with 1 in label, but received %d.", + label_lod.size())); PADDLE_ENFORCE_EQ(label_lod[0].size(), detect_lod[0].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch_size of input(Label) and input(Detection) " "must be the same, but received %d:%d", label_lod[0].size(), @@ -212,7 +212,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( input_label.dims()[1], 5, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input label width" " must be 5, but received %d, please check your input data", input_label.dims()[1])); @@ -504,7 +504,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { mAP += average_precisions; ++count; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unkown ap version %s. Now only supports integral and l1point.", ap_type)); } diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.cc b/paddle/fluid/operators/dlnne/dlnne_engine_op.cc index 86508bfbf2720..a09e8aaec8156 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.cc +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.cc @@ -44,8 +44,8 @@ std::string ConvertType(phi::DataType type) { } default: { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Calibration only support " - "float/float16/int32_t/int64_t input.")); + phi::errors::Fatal("The DLNNE Calibration only support " + "float/float16/int32_t/int64_t input.")); } } } @@ -66,8 +66,8 @@ int GetDataByte(phi::DataType type) { } default: { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Calibration only support " - "float/float16/int32_t/int64_t input.")); + phi::errors::Fatal("The DLNNE Calibration only support " + "float/float16/int32_t/int64_t input.")); } } } @@ -93,7 +93,7 @@ void ConvertPaddle2Onnx(std::string onnx_file_name, PADDLE_ENFORCE_EQ( convert_flag, 0, - platform::errors::Unavailable("Convert paddle to onnx failed")); + phi::errors::Unavailable("Convert paddle to onnx failed")); } } @@ -108,10 +108,9 @@ void QuantizeOnnx(std::string onnx_file_name, << " --output-model " << rlym_file_name; LOG(INFO) << convert_cmd.str(); int convert_flag = system(convert_cmd.str().c_str()); - PADDLE_ENFORCE_EQ( - convert_flag, - 0, - platform::errors::Unavailable("Convert onnx to rlym failed")); + PADDLE_ENFORCE_EQ(convert_flag, + 0, + phi::errors::Unavailable("Convert onnx to rlym failed")); } if (!FileExists(quantized_rlym_file_name.c_str())) { @@ -121,9 +120,8 @@ void QuantizeOnnx(std::string onnx_file_name, << dataset_plugin_path << " " << rlym_file_name; LOG(INFO) << quantize_cmd.str(); int quantize_flag = system(quantize_cmd.str().c_str()); - PADDLE_ENFORCE_EQ(quantize_flag, - 0, - platform::errors::Unavailable("quantize model failed")); + PADDLE_ENFORCE_EQ( + quantize_flag, 0, phi::errors::Unavailable("quantize model failed")); } } diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h index d0063c51512e3..363e21545c9ab 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h @@ -37,7 +37,7 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/utils/io_utils.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -131,7 +131,7 @@ static phi::DataType DLNNE2FluidDataType(dl::nne::DataType type) { case dl::nne::DataType::kBOOL: return phi::DataType::BOOL; default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "unknown fluid datatype in Fluid op converter")); return phi::DataType::FLOAT32; } @@ -316,10 +316,9 @@ class DlnneEngineOp : public framework::OperatorBase { } builder = dl::nne::CreateInferBuilder(); - PADDLE_ENFORCE_NE( - builder, - nullptr, - platform::errors::Unavailable("nne create builder failed")); + PADDLE_ENFORCE_NE(builder, + nullptr, + phi::errors::Unavailable("nne create builder failed")); dl::nne::BuilderConfig builder_cfg; builder_cfg.max_batch_size = max_batch_size_; builder_cfg.ws_mode = weight_share_map[weight_share_mode_]; @@ -327,10 +326,9 @@ class DlnneEngineOp : public framework::OperatorBase { network = builder->CreateNetwork(); parser = dl::nne::CreateParser(); - PADDLE_ENFORCE_NE( - parser, - nullptr, - platform::errors::Unavailable("nne create parser failed")); + PADDLE_ENFORCE_NE(parser, + nullptr, + phi::errors::Unavailable("nne create parser failed")); if (dlnne_log_flag_) { LOG(INFO) << "set output for dlnne"; } @@ -402,7 +400,7 @@ class DlnneEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( input_names_.empty(), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Dlnne engine needs at least one input, but no input is found. " "Please check if you set the input correctly.")); @@ -440,7 +438,7 @@ class DlnneEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( first_batch, batch, - platform::errors::Unavailable( + phi::errors::Unavailable( "compute infer_batches is different from each other")); } infer_batch = first_batch; @@ -474,13 +472,13 @@ class DlnneEngineOp : public framework::OperatorBase { data_bytes = 4; dtype = 2; } else if (type == phi::DataType::FLOAT16) { - buffer = static_cast(t.data()); + buffer = static_cast(t.data()); data_bytes = 2; dtype = 3; } else { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Engine OP only support " - "float/int32_t/int64_t/float16 input.")); + phi::errors::Fatal("The DLNNE Engine OP only support " + "float/int32_t/int64_t/float16 input.")); } input_buffers[bind_index] = buffer; @@ -555,7 +553,7 @@ class DlnneEngineOp : public framework::OperatorBase { auto *fluid_v = scope.FindVar(y); PADDLE_ENFORCE_NOT_NULL( fluid_v, - platform::errors::NotFound( + phi::errors::NotFound( "Output variable %s is not found in DLNNE subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 01df430f52161..d538164977277 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -71,7 +71,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_prob' must be between 0.0 and 1.0.")); }) .SupportTensor(); @@ -100,7 +100,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index d835caedbf3c8..00a5ca7a39d0e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -26,7 +26,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -44,7 +44,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputsVarType("Y").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, but the " "received is %s [%s].", ctx->GetInputsVarType("Y").front(), @@ -55,7 +55,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Y").size(), 1u, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the size of Y should be 1. " "But reveived the size of Y = %s.", @@ -63,14 +63,14 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Y")[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the first dimension of Y should be 1. " "But reveived the first dimension of Y = %s.", ctx->GetInputDim("Y")[0])); } else if (ctx->GetInputsVarType("X").front() != framework::proto::VarType::LOD_TENSOR) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Input X's type[%s] is not supported by elementwise_op. Please set " "its type to LOD_TENSOR.", ctx->GetInputsVarType("X").front())); @@ -87,7 +87,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { if (x_dims.size() == y_dims.size()) { PADDLE_ENFORCE_EQ((axis == -1) || (axis == 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "axis should be -1 or 0 while the dimension of " "tensor X (%s) is equal to the dimension of " "tensor Y (%s), but received axis: %s", @@ -97,7 +97,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ((axis >= (-1 * max_dim)) && (axis < max_dim), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis range must be [%s, %s), but axis is %s. " "Please set the axis again.", -1 * max_dim, diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index afa2df659c42a..3d0fe2ab399bc 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -76,7 +76,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Unable to get input Variable X, Variable name is %s.\n", ctx.InputName("X"))); auto *y = ctx.Input("Y"); @@ -89,13 +89,13 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, } else if (x_var->IsType()) { PADDLE_ENFORCE_EQ(y->dims().size() == 1 && y->dims()[0] == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse, Y must be " "scalar. But received the size of Y = %d.", y->dims().size())); PADDLE_ENFORCE_NOT_NULL( x_for_selectedrows, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The parameter x_for_selectedrows is excepted to " "be valid, once input variable X`s class type is " "SelectedRows.\n")); @@ -110,7 +110,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, z = ctx.Output("Out")->mutable_value(); ins->emplace_back(x_for_selectedrows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " "phi::DenseTensor or SelectedRows.", framework::ToTypeName(x_var->Type()))); @@ -1403,7 +1403,7 @@ void FusedElemwiseAndActGradComputeEx(const framework::ExecutionContext &ctx, if (UseIntermediateOut) { PADDLE_ENFORCE_NOT_NULL( intermediate_out, - platform::errors::InvalidArgument("Intermediate out is null pointer.")); + phi::errors::InvalidArgument("Intermediate out is null pointer.")); } if (x_dim == y_dim) { FusedElemwiseAndActGradComputeNoBroadcastIsType(), true, - platform::errors::InvalidArgument("XPU only support phi::DenseTensor, " - "Input(X) is not phi::DenseTensor")); + phi::errors::InvalidArgument("XPU only support phi::DenseTensor, " + "Input(X) is not phi::DenseTensor")); auto x = x_var->Get(); auto* y = ctx.Input("Y"); diff --git a/paddle/fluid/operators/enqueue_op.cc b/paddle/fluid/operators/enqueue_op.cc index c8279719789c4..225a2e067e190 100644 --- a/paddle/fluid/operators/enqueue_op.cc +++ b/paddle/fluid/operators/enqueue_op.cc @@ -52,14 +52,14 @@ class EnqueueOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::NotFound( + phi::errors::NotFound( "No LoDTensorBlockingQueueHolder variable with name %s found.", queue_name)); const std::string& var_name = Input("X"); auto* in_var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(in_var, - platform::errors::NotFound( - "No variable with name %s found.", var_name)); + PADDLE_ENFORCE_NOT_NULL( + in_var, + phi::errors::NotFound("No variable with name %s found.", var_name)); auto* in_tensor = in_var->GetMutable(); auto* queue_holder = queue_holder_var->template GetMutable(); diff --git a/paddle/fluid/operators/expand_as_v2_op.h b/paddle/fluid/operators/expand_as_v2_op.h index abc89ba75c671..a9dd1f08c385b 100644 --- a/paddle/fluid/operators/expand_as_v2_op.h +++ b/paddle/fluid/operators/expand_as_v2_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #define MAX_RANK_SUPPORTED 8 diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index bd558ee944359..4f57a35a1039e 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -36,7 +36,7 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( static_cast(x_dims.size()), expand_times.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of 'expand_times' for " "Op(expand) must be equal to the number of dimensions " "(%d) of the input.", @@ -45,7 +45,7 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE( x_dims.size(), MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input for Op(expand) " "must not be greater than %d, but the value received is %d.", MAX_RANK_SUPPORTED, @@ -59,7 +59,7 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( expand_times[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %uth element of 'expand_times' for Op(expand) must be " "greater than 0, but the value given is %d.", i, @@ -164,7 +164,7 @@ class ExpandGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension size (%d) of Input(Out@GRAD) should be " "equal to the corresponding dimension size (%d) of Input(X)", out_dims[0], @@ -180,7 +180,7 @@ class ExpandGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[i] * expand_times[i], out_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %uth dimension size (%d) of Input(Out@GRAD) should be " "equal to the multiplication of the corresponding dimension " "sizes of Input(X) (%d) and expand_times (%d).", @@ -285,19 +285,18 @@ REGISTER_OP_CPU_KERNEL(expand_grad, ops::ExpandGradKernel, ops::ExpandGradKernel); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -REGISTER_OP_CUDA_KERNEL( - expand, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel); +REGISTER_OP_CUDA_KERNEL(expand, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CUDA_KERNEL( expand_grad, ops::ExpandGradKernel, ops::ExpandGradKernel, - ops::ExpandGradKernel, + ops::ExpandGradKernel, ops::ExpandGradKernel, ops::ExpandGradKernel); #endif diff --git a/paddle/fluid/operators/expand_op.h b/paddle/fluid/operators/expand_op.h index 3d9fbe883b31b..3d539cbf0c944 100644 --- a/paddle/fluid/operators/expand_op.h +++ b/paddle/fluid/operators/expand_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #define MAX_RANK_SUPPORTED 8 @@ -97,14 +97,14 @@ class ExpandKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'x' for Op(expand) " "must be greater than or equal to 1, but the value received is %d.", rank)); PADDLE_ENFORCE_LE( rank, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'x' for Op(expand) " "must be less than or equal to %d, but the value received is %d.", MAX_RANK_SUPPORTED, @@ -146,7 +146,7 @@ class ExpandKernel : public framework::OpKernel { auto expand_times = get_expand_times(context); PADDLE_ENFORCE_EQ(static_cast(in_dims.size()), expand_times.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of 'expand_times' for " "Op(expand) must be equal to the number " "of dimensions (%d) of the input.", @@ -172,10 +172,10 @@ class ExpandKernel : public framework::OpKernel { // use 32-bit index to speed up bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); if (use_32bit_index) { - EigenBroadcast, T, Rank>::Eval( + phi::funcs::EigenBroadcast, T, Rank>::Eval( place, To32BitIndex(y), To32BitIndex(x), bcast_dims); } else { - EigenBroadcast, T, Rank>::Eval( + phi::funcs::EigenBroadcast, T, Rank>::Eval( place, y, x, bcast_dims); } } @@ -222,7 +222,7 @@ class ExpandGradKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_GE(dims, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input " "'Out@GRAD' for Op(expand_grad)" " must be greater than or equal to 1, but " @@ -230,7 +230,7 @@ class ExpandGradKernel : public framework::OpKernel { dims)); PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' " "for Op(expand_grad) must be less than or equal " "to %d, but the value received is %d.", @@ -262,7 +262,7 @@ class ExpandGradKernel : public framework::OpKernel { ExpandBackward<8>(context, reshape_dims_vec, reduce_dims_vec); break; default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Only support tensor with rank being between 1 and %d. But " "received tensor's rank = %d.", MAX_RANK_SUPPORTED, @@ -280,14 +280,14 @@ class ExpandGradKernel : public framework::OpKernel { size_t reduce_size = reduce_dims_vec.size(); PADDLE_ENFORCE_EQ(reshape_size, reshape_dims_vec.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inconsistent size between template Dims (%d) and " "reshape dimensions (%d).", reshape_size, reshape_dims_vec.size())); PADDLE_ENFORCE_EQ(reduce_size, reduce_dims_vec.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inconsistent size between template Dims (%d) and " "reduce dimensions (%d).", reduce_size, @@ -307,8 +307,8 @@ class ExpandGradKernel : public framework::OpKernel { auto out_grad = EigenVector::Flatten(*in0); auto& place = *context.template device_context().eigen_device(); - EigenBroadcastGrad, T, Dims>::Eval( - place, x_grad, out_grad, reduce_dims, reshape_dims); + phi::funcs::EigenBroadcastGrad, T, Dims>:: + Eval(place, x_grad, out_grad, reduce_dims, reshape_dims); } }; diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index b61cf2dc485e5..57013d5eb8bd1 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #define MAX_RANK_SUPPORTED 8 diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index ee64d5e1c5cc5..e527ae2d876e9 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -232,12 +232,12 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int& quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("x_num_col_dims", "The x_num_col_dims of mul. Only used for mul or matmul.") @@ -245,7 +245,7 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker .AddCustomChecker([](const int& x_num_col_dims) { PADDLE_ENFORCE_EQ(x_num_col_dims == 0, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'x_num_col_dims' should be larger than 0, but " "the received is %d", x_num_col_dims)); diff --git a/paddle/fluid/operators/fake_dequantize_op.cu b/paddle/fluid/operators/fake_dequantize_op.cu index ea069daa40d7d..1e9c28661e23c 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu +++ b/paddle/fluid/operators/fake_dequantize_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fake_dequantize_op.cu.h" namespace ops = paddle::operators; -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; PD_REGISTER_STRUCT_KERNEL(fake_dequantize_max_abs, GPU, diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index 57887721308d4..420996e878b76 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -82,7 +82,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( scales[0]->numel(), in->dims()[quant_axis], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of first scale values must be the same with " "quant_axis dimension value of Input(X) when the `Scales` has " "only one element, but %ld != %ld here.", @@ -93,7 +93,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( scales[0]->numel(), in->dims()[x_num_col_dims], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of first scale values must be the same with " "corresponding dimension value of Input(X) when the `Scales` " "has two elements, but %ld != %ld here.", @@ -101,7 +101,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { in->dims()[1])); PADDLE_ENFORCE_EQ(scales[1]->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The second scale tensor should only have one " "value at now, but it has %ld values here.", scales[1]->numel())); diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index a5169892187a2..d7d9a1416d919 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -54,9 +54,9 @@ struct FindChannelAbsMaxFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *in_data = in_tensor.data(); auto in_dims = in_tensor.dims(); const int64_t channel = in_dims[quant_axis]; @@ -167,9 +167,9 @@ struct ChannelClipAndFakeQuantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *scale_data = scale.data(); auto *in_data = in.data(); auto *out_data = out->mutable_data(ctx.GetPlace()); @@ -247,9 +247,9 @@ struct ChannelClipFakeQuantDequantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *scale_data = scale.data(); auto *in_data = in.data(); @@ -426,7 +426,7 @@ class FakeQuantOrWithDequantAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -493,19 +493,19 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int &quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -574,19 +574,19 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int &quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -654,7 +654,7 @@ class FakeQuantizeRangeAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -735,7 +735,7 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 68ceaca46d04f..240fd119ff09a 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fake_quantize_op.cu.h" namespace ops = paddle::operators; -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; PD_REGISTER_STRUCT_KERNEL(fake_quantize_abs_max, GPU, diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index bdf8a80debb64..cb2f498c22b0b 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -31,7 +31,7 @@ struct QuantizeDataType { }; template <> -struct QuantizeDataType { +struct QuantizeDataType { using type = float; }; @@ -92,7 +92,7 @@ struct FindAbsMaxFunctor { }; template struct FindAbsMaxFunctor; -template struct FindAbsMaxFunctor; +template struct FindAbsMaxFunctor; template __global__ void FindChannelAbsMaxKernelQuantAxis0(const T *in, @@ -172,9 +172,9 @@ struct FindChannelAbsMaxFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); const int num = in_tensor.numel(); auto in_dims = in_tensor.dims(); const T *in_data = in_tensor.data(); @@ -419,9 +419,9 @@ struct ChannelClipAndFakeQuantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); int64_t num = in.numel(); auto in_dims = in.dims(); @@ -665,9 +665,9 @@ struct ChannelClipFakeQuantDequantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); int num = in.numel(); auto in_dims = in.dims(); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 6387018d1865e..39af6b5d5dec2 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -453,11 +453,11 @@ class StraightThroughEstimatorGradKernel : public framework::OpKernel { context.Input(framework::GradVarName("Out")); auto x_grad_name = framework::GradVarName("X"); auto *d_x = context.Output(x_grad_name); - PADDLE_ENFORCE_NOT_NULL(d_x, - platform::errors::PreconditionNotMet( - "StraightThroughEstimatorGradKernel " - "doesn't have the output named %s.", - x_grad_name)); + PADDLE_ENFORCE_NOT_NULL( + d_x, + phi::errors::PreconditionNotMet("StraightThroughEstimatorGradKernel " + "doesn't have the output named %s.", + x_grad_name)); // Initialize dx as same as d_out d_x->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index 8a27649af864b..730ba969c779f 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -33,7 +33,7 @@ class FillConstantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each value of attribute 'shape' is expected to be no less " "than 0. But received: shape[%u] = %d; shape = [%s].", i, @@ -96,7 +96,7 @@ class FillConstantOp : public framework::OperatorWithKernel { kt.set_backend(phi::Backend::XPU); break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Could NOT determine the place of variable, place_type = %d .", place_type)); } diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index c00d23928a70c..e398e94e4ba09 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -29,7 +29,7 @@ PD_REGISTER_STRUCT_KERNEL(fill_zeros_like, int64_t, float, double, - plat::float16, + phi::dtype::float16, bool, plat::complex, plat::complex) {} @@ -42,7 +42,7 @@ PD_REGISTER_STRUCT_KERNEL(fill_zeros_like2, int64_t, float, double, - plat::float16, + phi::dtype::float16, bool, plat::complex, plat::complex) {} diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index 48c1a23b8591d..b6b67e12fd24d 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -43,12 +43,12 @@ class Flatten2Op : public framework::OperatorWithKernel { const auto &in_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE(axis, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis should be greater than or equal to 0.")); PADDLE_ENFORCE_LE( axis, in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis should be less than or equal to input tensor's rank")); const auto &out_dims = Flatten2Op::GetOutputShape(axis, in_dims); diff --git a/paddle/fluid/operators/fsp_op.cc b/paddle/fluid/operators/fsp_op.cc index d2c1d2c45d685..c447f9d485f5c 100644 --- a/paddle/fluid/operators/fsp_op.cc +++ b/paddle/fluid/operators/fsp_op.cc @@ -34,28 +34,28 @@ class FSPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) must have shape [batch_size, channel, height, width]." "Now the dimension of 'X' is %d.", x_dims.size())); PADDLE_ENFORCE_EQ( y_dims.size(), 4UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) must have shape [batch_size, channel, height, width]." "Now the dimension of 'Y' is %d.", y_dims.size())); PADDLE_ENFORCE_EQ( x_dims[2], y_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)(%d) and Input(Y)(%d) should have the same height.", x_dims[2], y_dims[2])); PADDLE_ENFORCE_EQ( x_dims[3], y_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)(%d) and Input(Y)(%d) should have the same width.", x_dims[3], y_dims[3])); diff --git a/paddle/fluid/operators/fused/attn_bias_add.cu.h b/paddle/fluid/operators/fused/attn_bias_add.cu.h index 8ea1e11cd29f4..2f1847d951058 100644 --- a/paddle/fluid/operators/fused/attn_bias_add.cu.h +++ b/paddle/fluid/operators/fused/attn_bias_add.cu.h @@ -170,7 +170,7 @@ void LaunchBiasAddFwKernel(const phi::GPUContext& ctx, break; } default: { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported vectorized size: %d !", vec_size)); break; } diff --git a/paddle/fluid/operators/fused/attn_feed_forward.h b/paddle/fluid/operators/fused/attn_feed_forward.h index 77339f1fa0d64..25ba1cc13ead2 100644 --- a/paddle/fluid/operators/fused/attn_feed_forward.h +++ b/paddle/fluid/operators/fused/attn_feed_forward.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/fused/attn_bias_add.cu.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/attn_gemm_int8.h b/paddle/fluid/operators/fused/attn_gemm_int8.h index 8dc4810b1f3b9..a6865649b26ae 100644 --- a/paddle/fluid/operators/fused/attn_gemm_int8.h +++ b/paddle/fluid/operators/fused/attn_gemm_int8.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/cublaslt.h" #include "paddle/fluid/operators/fused/quant_dequant_kernel.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" @@ -87,12 +87,12 @@ class AttnMatmulINT8 { std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, phi::funcs::AddFunctor()); - PADDLE_ENFORCE_EQ(cudaGetLastError(), - cudaSuccess, - platform::errors::Fatal( - "cuda error occurred after computing bias. " - "But it does not mean this error is caused by " - "bias computing")); + PADDLE_ENFORCE_EQ( + cudaGetLastError(), + cudaSuccess, + phi::errors::Fatal("cuda error occurred after computing bias. " + "But it does not mean this error is caused by " + "bias computing")); } } @@ -141,12 +141,12 @@ class AttnMatmulINT8 { std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, phi::funcs::AddFunctor()); - PADDLE_ENFORCE_EQ(cudaGetLastError(), - cudaSuccess, - platform::errors::Fatal( - "cuda error occurred after computing bias. " - "But it does not mean this error is caused by " - "bias computing")); + PADDLE_ENFORCE_EQ( + cudaGetLastError(), + cudaSuccess, + phi::errors::Fatal("cuda error occurred after computing bias. " + "But it does not mean this error is caused by " + "bias computing")); } } diff --git a/paddle/fluid/operators/fused/cublaslt.h b/paddle/fluid/operators/fused/cublaslt.h index e9728c58b55dc..e3f96b9ec1d3d 100644 --- a/paddle/fluid/operators/fused/cublaslt.h +++ b/paddle/fluid/operators/fused/cublaslt.h @@ -54,7 +54,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -69,7 +69,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmulDescCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -81,7 +81,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmulDescSetAttribute execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -91,7 +91,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -100,7 +100,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -109,7 +109,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -212,7 +212,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmul execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index 8f3b5e4f09a06..5fb6f38b4c682 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -39,7 +39,7 @@ struct BNStatsFinalizeArgs { PADDLE_ENFORCE_EQ( param_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), @@ -160,11 +160,11 @@ class CudnnBNStatsFinalize { CUDNN_BATCHNORM_SPATIAL_PERSISTENT); // Check workspace size, also creates plan. size_t workspace_size_bytes = train_op_.GetWorkspaceSizeInBytes(handle); - PADDLE_ENFORCE_EQ(workspace_size_bytes, - 0U, - platform::errors::InvalidArgument( - "Unexpected non-zero workspace size for " - "CudnnBNStatsFinalize.")); + PADDLE_ENFORCE_EQ( + workspace_size_bytes, + 0U, + phi::errors::InvalidArgument("Unexpected non-zero workspace size for " + "CudnnBNStatsFinalize.")); train_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, static_cast(nullptr)); train_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, @@ -192,11 +192,11 @@ class CudnnBNStatsFinalize { CUDNN_BATCHNORM_SPATIAL_PERSISTENT); // Check workspace size, also creates plan. size_t workspace_size_bytes = inference_op_.GetWorkspaceSizeInBytes(handle); - PADDLE_ENFORCE_EQ(workspace_size_bytes, - 0U, - platform::errors::InvalidArgument( - "Unexpected non-zero workspace size for " - "CudnnBNStatsFinalize.")); + PADDLE_ENFORCE_EQ( + workspace_size_bytes, + 0U, + phi::errors::InvalidArgument("Unexpected non-zero workspace size for " + "CudnnBNStatsFinalize.")); inference_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, static_cast(nullptr)); inference_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, diff --git a/paddle/fluid/operators/fused/cudnn_fusion_helper.h b/paddle/fluid/operators/fused/cudnn_fusion_helper.h index 7b738383f6ac7..f1df14c4f60de 100644 --- a/paddle/fluid/operators/fused/cudnn_fusion_helper.h +++ b/paddle/fluid/operators/fused/cudnn_fusion_helper.h @@ -52,7 +52,7 @@ class CudnnFusionOp { PADDLE_ENFORCE_EQ( plan_created_, true, - platform::errors::Fatal( + phi::errors::Fatal( "CudnnFusionOp exec requested without a valid 'plan', need: " ", GetWorkspaceSizeBytes(), Execute().")); PADDLE_ENFORCE_GPU_SUCCESS( diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index 9dbb8a8eaebc8..5d0e6c44c4e63 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -55,7 +55,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( input_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of input_shape is expected to 4. But received " "input_shape's size is %d, input_shape is [%s].", input_shape.size(), @@ -63,7 +63,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( filter_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of filter_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", filter_shape.size(), @@ -71,13 +71,13 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ(filter_shape[1] == filter_shape[2] && (filter_shape[1] == 1 || filter_shape[1] == 3), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The filter_shape is expected to store as nhwc, and " "h = w = 1 or 3. But received filter_shape is [%s].", common::make_ddim(filter_shape))); PADDLE_ENFORCE_EQ((filter_shape[0] % 32 == 0 && filter_shape[3] % 8 == 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input channel is expected to be multiple of 8, " "and the output channel is expected to be multiple " "of 32. But received input channel is %d, output " @@ -87,7 +87,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( output_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of output_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", output_shape.size(), @@ -96,7 +96,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( is_support, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current test is only supported in the platforms with " "compatiblity greater than or equal to 70 and the kernel size " "must be equal to 1 or 3. When the kernel size is 1, " diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 8b731e2c55408..7f47ea40e6cea 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -43,7 +43,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( data_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of data_shape is expected to 4. But received " "data_shape's size is %d, data_shape is [%s].", data_shape.size(), @@ -51,7 +51,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( param_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), @@ -59,7 +59,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( bitmask_shape.size(), 3U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of bitmask_shape is expected to 3. But received " "bitmask_shape's size is %d, bitmask_shape is [%s].", bitmask_shape.size(), @@ -76,7 +76,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( act_type, "relu", - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only relu activation supported in normalized convolution.")); mode = CUDNN_ACTIVATION_RELU; } diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 843b5009a6fcc..2a43eea07535a 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -118,7 +118,7 @@ void InvokeTransposeRemovePadding(const phi::GPUContext& dev_ctx, PADDLE_ENFORCE_EQ( head_dim % PackSize, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "dim_head=%d must be divisible by vec_size=%d", head_dim, PackSize)); const int32_t pack_num = elem_cnt / PackSize; const int32_t block_size = 128; @@ -666,7 +666,7 @@ class FMHARef { dev_ctx_, qk_out_grad_tensor); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Only used for the backward elementwise_add op when" "dy is not needed and dx is not reduce")); return; diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index 8ae1a60ad3b94..d46265de1b354 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -124,7 +124,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { if (transpose_qkv_wb) { PADDLE_ENFORCE_EQ(y_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 2 if enable" "transpose_qkv_wb: (dim_embed, 3 * dim_embed)," "but received dimensions of" @@ -132,13 +132,13 @@ class FusedAttentionOp : public framework::OperatorWithKernel { y_dim.size())); PADDLE_ENFORCE_GT(num_heads, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num_heads must be provided and greater than 0 " "if enable transpose_qkv_wb, but we got %d.", num_heads)); PADDLE_ENFORCE_EQ(y_dim[0] % num_heads, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "First dim of qkv_w must be divisible by num heads " "if enable transpose_qkv_wb, but receive first " "dim of qkv_w is %d and num_heads is %d.", @@ -147,7 +147,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { if (ctx->Attrs().Get("ring_id") == -1) { PADDLE_ENFORCE_EQ(y_dim[0] * 3, y_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 2" "(dim_embed, 3 * dim_embed).")); } else { @@ -159,21 +159,21 @@ class FusedAttentionOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(y_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4 if not" "enable transpose_qkv_wb: (3, num_head, dim_head, " "dim_embed), but received [%d]", y_dim.size())); PADDLE_ENFORCE_EQ(y_dim[0], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "First dim of qkv_w must be 3 if disable " "transpose_qkv_wb, but we got %d.", y_dim[0])); if (ctx->Attrs().Get("ring_id") == -1) { PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2], y_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(3, num_head, dim_head, dim_embed)," "and must satisfy the limitations: " @@ -186,15 +186,15 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); PADDLE_ENFORCE_EQ(x_dim[2], hidden_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3] " "(y_dim[1] if enable transpose_qkv_w) " "must be equal. But received: the shape " @@ -245,23 +245,23 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], num_heads, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", num_heads, @@ -272,14 +272,14 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( c_dim[3], 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The forth dim of CacheKV must be greater than 0, but got %d", c_dim[3])); // cache_seq_len } PADDLE_ENFORCE_EQ(c_dim[4], dim_head, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", dim_head, @@ -400,7 +400,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -413,7 +413,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'attn_dropout_rate' must be between 0.0 and 1.0.")); }); AddAttr("is_test", @@ -449,7 +449,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -459,7 +459,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout_fix_seed", @@ -479,7 +479,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -489,7 +489,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &ln_epsilon) { PADDLE_ENFORCE_EQ(ln_epsilon >= 0.0f && ln_epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' of the second LayerNorm in Fused " "attention op should be between" "0.0 and 0.001, But received [%s].", @@ -540,7 +540,7 @@ class FusedAttentionGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GradOp is only callable when is_test is false")); if (ctx->Attrs().Get("pre_layer_norm") == false) { diff --git a/paddle/fluid/operators/fused/fused_attention_utils.h b/paddle/fluid/operators/fused/fused_attention_utils.h index 39eb4c821e00a..18e3a513b3053 100644 --- a/paddle/fluid/operators/fused/fused_attention_utils.h +++ b/paddle/fluid/operators/fused/fused_attention_utils.h @@ -62,7 +62,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT // Use New Communication Library PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,7 +73,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index 2ea40d840d2b3..69869cd3b7729 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -27,57 +27,57 @@ namespace operators { void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Scale"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Scale) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Bias"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Bias) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Mean"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Mean) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Variance"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Variance) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Y"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Y) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("MeanOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MeanOut) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("VarianceOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(VarianceOut) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SavedMean"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(SavedMean) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SavedVariance"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(SavedVariance) of BatchNormOp should not be null.")); // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Mean and MeanOut should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Variance and VarianceOut should share the same memory")); const auto x_dims = ctx->GetInputDim("X"); @@ -85,23 +85,23 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::PreconditionNotMet("ShapeError: the dimension of input " - "X must greater than or equal to 2." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, - x_dims.size())); + phi::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must greater than or equal to 2." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, + x_dims.size())); PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::PreconditionNotMet("ShapeError: the dimension of input " - "X must smaller than or equal to 5." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, - x_dims.size())); + phi::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must smaller than or equal to 5." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, + x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; @@ -111,7 +111,7 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -119,7 +119,7 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { scale_dim.size())); PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -135,14 +135,14 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -166,25 +166,25 @@ phi::KernelKey FusedBatchNormActOp::GetExpectedKernelType( if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::PreconditionNotMet( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::PreconditionNotMet( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Mean")->dtype()), - platform::errors::PreconditionNotMet( - "Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::PreconditionNotMet("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::PreconditionNotMet("Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), + phi::errors::PreconditionNotMet("Mean input should be of float type")); PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( ctx.Input("Variance")->dtype()), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Variance input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -197,7 +197,7 @@ void FusedBatchNormActOpMaker::Make() { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(epsilon) should be between 0.0 and 0.001, " "but received value is %f.", epsilon)); @@ -252,37 +252,37 @@ void FusedBatchNormActGradOp::InferShape( PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Scale"), true, - platform::errors::InvalidArgument("Input(Scale) should not be null.")); + phi::errors::InvalidArgument("Input(Scale) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Y")), true, - platform::errors::InvalidArgument("Input(Y@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("SavedMean"), - true, - platform::errors::InvalidArgument( - "Input(SavedMean) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("SavedVariance"), - true, - platform::errors::InvalidArgument( - "Input(SavedVariance) should not be null")); + phi::errors::InvalidArgument("Input(Y@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("SavedMean"), + true, + phi::errors::InvalidArgument("Input(SavedMean) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("SavedVariance"), + true, + phi::errors::InvalidArgument("Input(SavedVariance) should not be null")); // check output PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::InvalidArgument("Output(X@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Scale")), - true, - platform::errors::InvalidArgument( - "Output(Scale@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Bias")), - true, - platform::errors::InvalidArgument( - "Output(Bias@GRAD) should not be null.")); + phi::errors::InvalidArgument("Output(X@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("Scale")), + true, + phi::errors::InvalidArgument("Output(Scale@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("Bias")), + true, + phi::errors::InvalidArgument("Output(Bias@GRAD) should not be null.")); const auto x_dims = ctx->GetInputDim("X"); const int C = x_dims[x_dims.size() - 1]; @@ -297,8 +297,8 @@ phi::KernelKey FusedBatchNormActGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + PADDLE_THROW( + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -306,7 +306,7 @@ phi::KernelKey FusedBatchNormActGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + phi::errors::NotFound("Can not get the tensor value of Y@GRAD.")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index ac198e9cf2c25..ff903ee6ca716 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -51,35 +51,35 @@ void FusedBatchNormAddActOp::InferShape( const auto x_dims = ctx->GetInputDim("X"); const auto z_dims = ctx->GetInputDim("Z"); - PADDLE_ENFORCE_EQ(x_dims, - z_dims, - platform::errors::InvalidArgument( - "ShapeError: the shapes of input " - "must be equal. But received: the shape " - "of input X = [%s], and the shape of " - "input Y = [%s]", - x_dims, - z_dims)); + PADDLE_ENFORCE_EQ( + x_dims, + z_dims, + phi::errors::InvalidArgument("ShapeError: the shapes of input " + "must be equal. But received: the shape " + "of input X = [%s], and the shape of " + "input Y = [%s]", + x_dims, + z_dims)); PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument("ShapeError: the dimensions of input " - "must greater than or equal to 2." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); + phi::errors::InvalidArgument("ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::InvalidArgument("ShapeError: the dimensions of input " - "must smaller than or equal to 5." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); + phi::errors::InvalidArgument("ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; @@ -89,7 +89,7 @@ void FusedBatchNormAddActOp::InferShape( PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -97,7 +97,7 @@ void FusedBatchNormAddActOp::InferShape( scale_dim.size())); PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -113,14 +113,14 @@ void FusedBatchNormAddActOp::InferShape( if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -145,12 +145,12 @@ phi::KernelKey FusedBatchNormAddActOp::GetExpectedKernelType( bn_param_type, framework::TransToProtoVarType( ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument("Scale input should be of float type")); + phi::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, framework::TransToProtoVarType( ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument("Bias input should be of float type")); + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -194,7 +194,7 @@ void FusedBatchNormAddActOpMaker::Make() { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be between 0.0 and 0.001.")); }); AddAttr("act_type", "The activation type to be fused.") @@ -261,8 +261,8 @@ phi::KernelKey FusedBatchNormAddActGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + PADDLE_THROW( + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -270,7 +270,7 @@ phi::KernelKey FusedBatchNormAddActGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + phi::errors::NotFound("Can not get the tensor value of Y@GRAD.")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), diff --git a/paddle/fluid/operators/fused/fused_dropout_common.h b/paddle/fluid/operators/fused/fused_dropout_common.h index ccd099109487c..737909be4d8bf 100644 --- a/paddle/fluid/operators/fused/fused_dropout_common.h +++ b/paddle/fluid/operators/fused/fused_dropout_common.h @@ -22,9 +22,9 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/quant_dequant_kernel.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/functors.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h index 2b1f6b14c33e5..9e9a89015652b 100644 --- a/paddle/fluid/operators/fused/fused_dropout_helper.h +++ b/paddle/fluid/operators/fused/fused_dropout_helper.h @@ -288,7 +288,7 @@ class FusedDropoutHelper { quant_max_bound, quant_min_bound); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Currently only supports gelu or relu activation functions!")); } } @@ -332,7 +332,7 @@ class FusedDropoutHelper { d_bias, ctx); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Currently only supports gelu or relu activation functions!")); } } diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc index b11840866d46b..b17a6827af0e9 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc @@ -24,7 +24,7 @@ bool IsUnaryCompound(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -39,7 +39,7 @@ bool HasInPlaceUnary(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -55,7 +55,7 @@ bool InputXCanBeAbsent(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -73,7 +73,7 @@ static bool IsSupportedCompound(const std::vector &functors) { PADDLE_ENFORCE_EQ( functors.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functors.size(), 2)); @@ -89,12 +89,12 @@ static bool IsSupportedCompound(const std::vector &functors) { } else if (binary_fun.count(functors[1])) { unary_fun_str = functors[0]; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "%s and %s are not included in fused_list.", functors[0], functors[1])); } PADDLE_ENFORCE_EQ(unary_fun.count(unary_fun_str), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "%s is not included in fused_list.", unary_fun_str)); return true; } @@ -107,17 +107,17 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of FusedElemwiseActivationOp op should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Y"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) of FusedElemwiseActivationOp op should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of FusedElemwiseActivationOp op should not be null.")); auto x_dim = ctx->GetInputDim("X"); @@ -134,7 +134,7 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasOutput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(IntermediateOut) of FusedElemwiseActivationOp " "should not be null.")); @@ -176,7 +176,7 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), ctx.Input("Y")->dtype(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The element's type of input should be the same.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); @@ -214,7 +214,7 @@ class FusedElemwiseActivationMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( IsSupportedCompound(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input functors should support compounding.")); }); @@ -317,10 +317,10 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@Grad) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@Grad) should not be null.")); auto functor_list = ctx->Attrs().Get>("functor_list"); @@ -328,14 +328,14 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { if (ctx->Attrs().Get("save_intermediate_out")) { PADDLE_ENFORCE_EQ(ctx->HasInput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(IntermediateOut) should not be null.")); } else { if (!InputXCanBeAbsent(functor_list)) { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } } @@ -353,7 +353,7 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when BinaryFunctor is elementwise_add, the 'X' " "could be absent.")); @@ -370,7 +370,7 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Y"), true, - platform::errors::InvalidArgument("Input(Y) should not be null.")); + phi::errors::InvalidArgument("Input(Y) should not be null.")); ctx->SetOutputDim(y_grad_name, ctx->GetInputDim("Y")); ctx->ShareLoD("Y", y_grad_name); } @@ -414,7 +414,7 @@ class FusedElemwiseAddActivationOp : public FusedElemwiseActivationOp { PADDLE_ENFORCE_EQ( elemntwise_add_detected, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When the FusedElemwiseAddActivationOp Is used in fused pass, the " "elementwise_add Op must be" "detected and used, Please check the fuse pass pattern")); @@ -439,7 +439,7 @@ class FusedElemwiseAddActivationOpGrad : public FusedElemwiseActivationOpGrad { PADDLE_ENFORCE_EQ( elemntwise_add_grad_detected, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When the FusedElemwiseAddActivationOpGrad Is used in fused pass, " "the elementwise_add_grad Op must be" "detected and used, Please check the fuse pass pattern")); diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu index e7c436dd1fa0c..e712b78c42669 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu @@ -23,14 +23,14 @@ PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation, ops::FusedElemwiseActivationKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation_grad, GPU, ALL_LAYOUT, ops::FusedElemwiseActivationGradKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, GPU, @@ -38,11 +38,11 @@ PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, ops::FusedElemwiseAddActivationKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation_grad, GPU, ALL_LAYOUT, ops::FusedElemwiseAddActivationGradKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index ad7f79307e628..6c476afd340fa 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -415,8 +415,8 @@ static void RunFunctors(const framework::ExecutionContext &ctx, in_y, outputs); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "%s has not been implemented.", funcs_str)); + PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", + funcs_str)); } } @@ -611,8 +611,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, y_grad, d_intermediate_out); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "%s has not been implemented.", funcs_str)); + PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", + funcs_str)); } } @@ -629,10 +629,10 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { "Y", "FusedElemwiseActivation"); - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::InvalidArgument( - "The output(Out) should not be empty")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::InvalidArgument("The output(Out) should not be empty")); auto output = ctx.Output("Out"); std::vector outputs; @@ -641,7 +641,7 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { if (ctx.Attr("save_intermediate_out")) { PADDLE_ENFORCE_EQ(ctx.HasOutput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The save_intermediate_out is enable, so the " "IntermediateOut should not be empty.")); @@ -663,16 +663,16 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_y, nullptr, - platform::errors::InvalidArgument("Input(Y) should not be nullptr.")); + phi::errors::InvalidArgument("Input(Y) should not be nullptr.")); phi::DenseTensor *in_out = const_cast(ctx.Input("Out")); auto in_out_grad = ctx.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_NE(in_out_grad, - nullptr, - platform::errors::InvalidArgument( - "Input(Out@Grad) should not be nullptr.")); + PADDLE_ENFORCE_NE( + in_out_grad, + nullptr, + phi::errors::InvalidArgument("Input(Out@Grad) should not be nullptr.")); phi::DenseTensor *in_x = const_cast(ctx.Input("X")); @@ -695,7 +695,7 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { ctx.Input("IntermediateOut")); PADDLE_ENFORCE_NE(in_intermediate_out, nullptr, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The option of 'save_intermediate_out' is opened," " so the number of 'Out' should be two.")); } else { @@ -703,7 +703,7 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_x, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } } @@ -712,13 +712,13 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_x, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } else { // If functor_list contains elementwise_add, the backward doesn't use // in_x, in_y and in_out. PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when the compoundfunctor contains " "elementwise_add_grad, the 'X' could be absent.")); in_x = const_cast(in_out_grad); @@ -729,13 +729,13 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_out, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } else { // If functor_list contains elementwise_add, the backward doesn't use // in_x, in_y and in_out. PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when the compoundfunctor contains " "elementwise_add_grad, the 'X' could be absent.")); in_out = const_cast(in_out_grad); diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index e69825fdd9076..e4c43e4e4efb2 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -50,12 +50,12 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Embeddings's rank should be 2, but received value is:%d.", table_dims.size())); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the 'Ids' tensor must be 1, but " "received value is:%d.", ids_dims[ids_rank - 1])); @@ -64,14 +64,14 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids)'s rank must be 2, but received value is:%d.", x_dims.size())); if (ctx->HasInput("H0")) { PADDLE_ENFORCE_EQ(ctx->HasInput("C0"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Cell) and Input(Hidden) of LSTM should exist " "at the same time.")); auto h_dims = ctx->GetInputDim("H0"); @@ -79,7 +79,7 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) " "should be the same, but received H0 dim is:[%s], C0 dim is[%s]", h_dims, @@ -91,19 +91,19 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( wh_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightH) should be 2, but received value is:%d.", wh_dims.size())); PADDLE_ENFORCE_EQ(wh_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightH) should equal to " "frame size:%d, but received value is:%d.", frame_size, wh_dims[0])); PADDLE_ENFORCE_EQ(wh_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WeightH) should equal " "to 4 * %d, but received value is:%d.", frame_size, @@ -113,19 +113,19 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received value is:%d.", b_dims.size())); - PADDLE_ENFORCE_EQ(b_dims[0], - 1, - platform::errors::InvalidArgument( - "The first dimension of Input(Bias) " - "should be 1, but received value is:%d.", - b_dims[0])); + PADDLE_ENFORCE_EQ( + b_dims[0], + 1, + phi::errors::InvalidArgument("The first dimension of Input(Bias) " + "should be 1, but received value is:%d.", + b_dims[0])); PADDLE_ENFORCE_EQ( b_dims[1], (ctx->Attrs().Get("use_peepholes") ? 7 : 4) * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "7 * %d if enable peepholes connection or" "4 * %d if disable peepholes, bias dim is:%d, use_peepholes:%d", @@ -417,11 +417,11 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], row_number, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should less than dict size %d.", i, row_number)); PADDLE_ENFORCE_GE(ids_data[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should greater than ZERO.", i)); memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, @@ -530,11 +530,11 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], row_number, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should less than dict size %d.", i, row_number)); PADDLE_ENFORCE_GE(ids_data[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should greater than ZERO.", i)); memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index a0ee64bd2eced..4a7691bd33844 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -37,21 +37,21 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dim size of the input tensor 'W' should be 2. " "But received W's size = %d.", table_dims.size())); PADDLE_ENFORCE_EQ( ids_dims[ids_dims.size() - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the input tensor 'Ids' should be 1. " "But received Ids's size in the last dimension = %d.", ids_dims[ids_dims.size() - 1])); // we only support sum now PADDLE_ENFORCE_EQ(combiner, "sum", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "The pooling type of sequence_pool only support sum " "now. So the 'combiner' must be 'sum'.")); @@ -61,7 +61,7 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { PADDLE_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Ids")[0]); PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "In compile time, the LoD Level of Ids should be 1. " "But received the LoD Level of Ids = %d.", ids_desc->GetLoDLevel())); diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index e0186d99acb03..2a9a1e71dbd2b 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -95,7 +95,7 @@ struct EmbeddingVSumFunctor { PADDLE_ENFORCE_LE(table_width * idx_width, out_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "table_width * idx_width should be less than or " "equal to out_width. But received " "table_width * idx_width = %s, out_width = %d.", @@ -103,7 +103,7 @@ struct EmbeddingVSumFunctor { out_width)); PADDLE_ENFORCE_GT(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor ids's LoD[0] should be greater than 1. " "But received the ids's LoD[0] = %d.", ids_lod.size())); @@ -152,7 +152,7 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel { // in run time, the LoD of ids must be 1 PADDLE_ENFORCE_EQ(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(Ids) should be 1. But " "received Ids's LoD level = %d.", ids_lod.size())); @@ -236,7 +236,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto *table_t = context.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::PermissionDenied( + PADDLE_THROW(phi::errors::PermissionDenied( "The parameter W of a LookupTable " "must be either phi::DenseTensor or SelectedRows.")); } @@ -293,7 +293,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { const auto &ids_lod = ids->lod(); PADDLE_ENFORCE_EQ(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(Ids) should be 1. But " "received Ids's LoD level = %d.", ids_lod.size())); diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index f6343f5bd1cbf..5956ea5a839a7 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -79,8 +79,8 @@ class FusedFeedForwardOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( mat_dim_x.width_, static_cast(1), - platform::errors::InvalidArgument("Product from the X shape[1] to " - "shape[n-1] must be larger than 1!")); + phi::errors::InvalidArgument("Product from the X shape[1] to " + "shape[n-1] must be larger than 1!")); auto dim_Linear1Weight = context->GetInputDim("Linear1Weight"); auto tmp_dim_x = dim_x; tmp_dim_x[dim_x.size() - 1] = @@ -190,7 +190,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout1_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout2_rate", "the dropout rate of second dropout") @@ -199,7 +199,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout2_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout1_implementation", @@ -209,7 +209,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout1_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -220,7 +220,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout2_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -266,7 +266,7 @@ class FusedFeedForwardOpGrad : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GradOp is only callable when is_test is false")); bool pre_layer_norm = ctx->Attrs().Get("pre_layer_norm"); OP_INOUT_CHECK(ctx->HasInput("Dropout1Mask"), diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h index 69fbca0f9be0f..cc1d0de18ada1 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention.h +++ b/paddle/fluid/operators/fused/fused_gate_attention.h @@ -156,8 +156,8 @@ struct GateAttentionConfig { if (merge_qkv) { PADDLE_ENFORCE_NOT_NULL( qkv_weight, - platform::errors::NotFound("The input qkv_weight can not be nullptr " - "when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_weight can not be nullptr " + "when merge_qkv is true.")); // When q_dim == kv_dim, QKV matmul can be computed merged. // qkv_weight: shape=[3, num_heads, head_dim, q_dim] @@ -172,12 +172,12 @@ struct GateAttentionConfig { } else { PADDLE_ENFORCE_NOT_NULL( key, - platform::errors::NotFound( + phi::errors::NotFound( "The input key can not be nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( query_weight, - platform::errors::NotFound("The input query_weight can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input query_weight can not be " + "nullptr when merge_qkv is false.")); // When q_dim != kv_dim, QKV matmul must be computed saparately. // key: shape=[batch_size, seq_len_m, m_size, kv_dim] @@ -414,8 +414,8 @@ class FMHAGateRef { // qkv_transpose_out = transpose(qkv_out) PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); phi::DenseTensor* qkv_out = config->GetQKVOut(); ComputeQKVTransposeForward(*qkv_out, qkv_transpose_out); @@ -429,16 +429,16 @@ class FMHAGateRef { } else { PADDLE_ENFORCE_NOT_NULL( q_transpose_out, - platform::errors::NotFound("The input q_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input q_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( k_transpose_out, - platform::errors::NotFound("The input k_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input k_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( v_transpose_out, - platform::errors::NotFound("The input v_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input v_transpose_out can not be " + "nullptr when merge_qkv is false.")); phi::DenseTensor* query_out = config->GetQueryOut(); phi::DenseTensor* key_out = config->GetKeyOut(); @@ -544,8 +544,8 @@ class FMHAGateRef { if (merge_qkv_) { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); int64_t q_size = config->GetQuerySize(); q_ptr = qkv_transpose_out->data(); @@ -562,16 +562,16 @@ class FMHAGateRef { } else { PADDLE_ENFORCE_NOT_NULL( q_transpose_out, - platform::errors::NotFound("The input q_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input q_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( k_transpose_out, - platform::errors::NotFound("The input k_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input k_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( v_transpose_out, - platform::errors::NotFound("The input v_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input v_transpose_out can not be " + "nullptr when merge_qkv is false.")); q_ptr = q_transpose_out->data(); k_ptr = k_transpose_out->data(); @@ -787,11 +787,11 @@ class FMHAGateRef { phi::DenseTensor* nonbatched_bias_grad) { PADDLE_ENFORCE_NOT_NULL( qk_out_grad, - platform::errors::NotFound("The qk_out_grad can not be nullptr.")); + phi::errors::NotFound("The qk_out_grad can not be nullptr.")); PADDLE_ENFORCE_EQ(qk_out_grad->dims(), softmax_out->dims(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of qk_out_grad and softmax_out is " "expected to be the same. But received qk_out_grad's " "shape = %s, softmax_out's shape = %s.", @@ -800,7 +800,7 @@ class FMHAGateRef { PADDLE_ENFORCE_EQ(src_mask_grad, nullptr, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "src_mask_grad is expected to be nullptr.")); phi::SoftmaxBackwardCUDAKernelDriver( @@ -874,8 +874,8 @@ class FlashAttnWithGating { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); // 1. Transpose qkv_out for flash_attn. phi::DenseTensor* qkv_out = config->GetQKVOut(); @@ -989,8 +989,8 @@ class FlashAttnWithGating { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be" - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be" + "nullptr when merge_qkv is true.")); int64_t q_size = config->GetQuerySize(); const T* q_ptr = qkv_transpose_out->data(); diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index d066086bd6ae0..78202f70bcffb 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -401,7 +401,7 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( !key || query == key || query->data() == key->data(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "key is expected to be nullptr or the same as " "query, but received key=%p, query=%p.", key, @@ -623,14 +623,14 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, ALL_LAYOUT, ops::FusedGateAttentionOpKernel, float, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, GPU, ALL_LAYOUT, ops::FusedGateAttentionGradKernel, float, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} #else PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, @@ -639,7 +639,7 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, ops::FusedGateAttentionOpKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, GPU, @@ -647,6 +647,6 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, ops::FusedGateAttentionGradKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} #endif diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index a6bd467dc1992..157ab69afc943 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -466,12 +466,11 @@ struct FusedLayernormResidualDropoutBiasFunctor { } }; -template struct FusedLayernormResidualDropoutBiasFunctor< - paddle::platform::float16, - uint8_t, - 8, - float, - false>; +template struct FusedLayernormResidualDropoutBiasFunctor; /* * @brief layernorm(residual + dropout(x)); @@ -872,7 +871,7 @@ void LaunchLayernormResidualDropoutBias( epsilon, cols)); default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Product from begin_norm_axis to end must be larger than 1")); break; } @@ -1037,7 +1036,7 @@ void LaunchLayernormResidualDropoutBias( switch (cols) { LAUNCH_FUSED_FAST_LN_KERNEL; default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Only when column is equal to 768/1024/4096 is supported for " "now")); break; diff --git a/paddle/fluid/operators/fused/fused_matmul_op.cc b/paddle/fluid/operators/fused/fused_matmul_op.cc index 129f7e85386e7..93d79d677f8a5 100644 --- a/paddle/fluid/operators/fused/fused_matmul_op.cc +++ b/paddle/fluid/operators/fused/fused_matmul_op.cc @@ -176,47 +176,47 @@ class FusedMatmulOpMaker : public framework::OpProtoAndCheckerMaker { protected: void Apply() { AddInput("ResidualData", - "Extra input from matmul_elementwise_add_mkldnn_fuse_pass") + "Extra input from matmul_elementwise_add_onednn_fuse_pass") .AsDispensable() .AsExtra(); AddAttr("matmul_alpha", "Output scale used in matmul_v1") .SetDefault(1.0f); AddAttr( "fuse_activation", - "Activation type from matmul_activation_mkldnn_fuse_pass") + "Activation type from matmul_activation_onednn_fuse_pass") .SetDefault(""); AddAttr("fuse_alpha", - "Activation alpha from matmul_activation_mkldnn_fuse_pass") + "Activation alpha from matmul_activation_onednn_fuse_pass") .SetDefault(0.0f); AddAttr("fuse_beta", - "Activation beta from matmul_activation_mkldnn_fuse_pass") + "Activation beta from matmul_activation_onednn_fuse_pass") .SetDefault(0.0f); AddAttr("fused_output_scale", "Output scale from operator_scale_onednn_fuse_pass") .SetDefault(1.0f); AddAttr>("fused_reshape_X", "Reshape's shape attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_X", "Transpose's axis attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_reshape_Y", "Reshape's shape attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_Y", "Transpose's axis attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_reshape_Out", "Reshape's shape attribute from " - "matmul_transpose_reshape_mkldnn_fuse_pass") + "matmul_transpose_reshape_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_Out", "Transpose's axis attribute from " - "matmul_transpose_reshape_mkldnn_fuse_pass") + "matmul_transpose_reshape_onednn_fuse_pass") .SetDefault({}); AddAttr("mkldnn_data_type", "oneDNN operator data type") .SetDefault("float32") diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc index 562ddf7ae6c4e..dcea415e32508 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc @@ -72,23 +72,23 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); - PADDLE_ENFORCE_EQ(y_dim.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of qkv_weight must be 4" - "(3, num_head, dim_head, dim_embed)," - "but received dimensions of" - "Input is [%d]", - y_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); + PADDLE_ENFORCE_EQ( + y_dim.size(), + 4, + phi::errors::InvalidArgument("The dimensions of qkv_weight must be 4" + "(3, num_head, dim_head, dim_embed)," + "but received dimensions of" + "Input is [%d]", + y_dim.size())); PADDLE_ENFORCE_EQ( x_dim[2], trans_qkvw ? y_dim[3] : y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is " "true) or y_dim[0](trans_qkvw is false)" "must be equal. But received: the shape " @@ -101,7 +101,7 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { if (trans_qkvw) { PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2], y_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(3, num_head, dim_head, dim_embed)," "and must satisfy the limitations: " @@ -110,7 +110,7 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(y_dim[2] * y_dim[3], y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(dim_embed, 3, num_head, dim_head)," "and must satisfy the limitations: " @@ -126,23 +126,23 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], trans_qkvw ? y_dim[1] : y_dim[2], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", trans_qkvw ? y_dim[1] : y_dim[2], @@ -150,12 +150,12 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( c_dim[3], 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The forth dim of CacheKV must be greater than 0, but got %d", c_dim[3])); // cache_seq_len PADDLE_ENFORCE_EQ(c_dim[4], trans_qkvw ? y_dim[2] : y_dim[3], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", trans_qkvw ? y_dim[2] : y_dim[3], @@ -273,7 +273,7 @@ class FusedMultiTransformerINT8OpMaker .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -284,7 +284,7 @@ class FusedMultiTransformerINT8OpMaker .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); @@ -301,7 +301,7 @@ class FusedMultiTransformerINT8OpMaker PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu index a76e93f5cdcf5..5893024c0e958 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu @@ -106,19 +106,19 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { if (time_step) { PADDLE_ENFORCE_EQ(time_step->place(), platform::CPUPlace(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The place of input(TimeStep) must be CPUPlace.")); // cache_seq_len int time_step_value = time_step->data()[0]; PADDLE_ENFORCE_GT(time_step_value, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of time_step must > 0, but now is %d", time_step_value)); PADDLE_ENFORCE_EQ( seq_len, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "In decode stage, the seq_len of input must be 1, but now is %d", seq_len)); out_seq_len += time_step_value; @@ -668,4 +668,4 @@ PD_REGISTER_STRUCT_KERNEL(fused_multi_transformer_int8, ALL_LAYOUT, ops::FusedMultiTransformerINT8OpKernel, float, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index a8bd90c7da5d4..dc90eaa3e5306 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -66,23 +66,23 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); - PADDLE_ENFORCE_EQ(y_dim.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of qkv_weight must be 4" - "(3, num_head, dim_head, dim_embed)," - "but received dimensions of" - "Input is [%d]", - y_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); + PADDLE_ENFORCE_EQ( + y_dim.size(), + 4, + phi::errors::InvalidArgument("The dimensions of qkv_weight must be 4" + "(3, num_head, dim_head, dim_embed)," + "but received dimensions of" + "Input is [%d]", + y_dim.size())); PADDLE_ENFORCE_EQ( x_dim[2], trans_qkvw ? y_dim[3] : y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is " "true) or y_dim[0](trans_qkvw is false)" "must be equal. But received: the shape " @@ -99,30 +99,30 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], trans_qkvw ? y_dim[1] : y_dim[2], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", trans_qkvw ? y_dim[1] : y_dim[2], c_dim[2])); // num_head PADDLE_ENFORCE_EQ(c_dim[4], trans_qkvw ? y_dim[2] : y_dim[3], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", trans_qkvw ? y_dim[2] : y_dim[3], @@ -223,7 +223,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( rotary_emb_dims >= 0 && rotary_emb_dims <= 2, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'rotary_emb_dims' in Op(Rotray) should be between" "0 and 2, But received [%s].", rotary_emb_dims)); @@ -234,7 +234,7 @@ class FusedMultiTransformerOpOpMaker .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -245,7 +245,7 @@ class FusedMultiTransformerOpOpMaker .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); @@ -262,7 +262,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -272,7 +272,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( act_type == "gelu" || act_type == "relu" || act_type == "none", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support `gelu`, `relu`, `none` activation in " "FusedMultiTransformer. ")); }); diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h index 415a6ba1ffdf3..4bf467e9caf8f 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h @@ -131,7 +131,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT namespace { // NOLINT namespace plat = paddle::platform; -using float16 = plat::float16; +using float16 = phi::dtype::float16; #define MMHA_USE_FP32_ACUM_FOR_LOGITS #define MMHA_USE_FP32_ACUM_FOR_OUT @@ -746,9 +746,9 @@ inline __device__ void convert_from_float(float4 &dst, float4 src) { // NOLINT dst = src; } -inline __device__ void convert_from_float(plat::float16 &dst, // NOLINT +inline __device__ void convert_from_float(phi::dtype::float16 &dst, // NOLINT float src) { - dst = static_cast(src); + dst = static_cast(src); } inline __device__ void convert_from_float(uint4 &dst, Float8_ src) { // NOLINT diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc index 3dbba2bf42ce4..93ac8f4e220c9 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc @@ -25,24 +25,23 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusedSeqpoolCVMOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of FusedSeqpoolCVMOp should not be empty.")); auto cvm_dims = ctx->GetInputDim("CVM"); PADDLE_ENFORCE_EQ( cvm_dims.size(), 2UL, - platform::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); - PADDLE_ENFORCE_EQ( - cvm_dims[1], - 2UL, - platform::errors::InvalidArgument("The 2nd dimension of " - "Input(CVM) should be 2.")); + phi::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); + PADDLE_ENFORCE_EQ(cvm_dims[1], + 2UL, + phi::errors::InvalidArgument("The 2nd dimension of " + "Input(CVM) should be 2.")); auto ins_dims = ctx->GetInputsDim("X"); const int cvm_offset = ctx->Attrs().Get("cvm_offset"); @@ -53,7 +52,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT(num_inputs, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensors count should be greater than 0, " "but received value is %d.", num_inputs)); @@ -62,7 +61,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be equal to 2, " "but received value is %d.", ins_dims[0].size())); @@ -88,7 +87,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please check, last batch_size is %d, current " "batch_size is %d", @@ -111,7 +110,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( dims[rank - 1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension(embedding) of the " "'X' tensor must be larger than 2.", i)); @@ -145,7 +144,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ(flag, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "All Inputs of fused_seqpool_cvm OP are Empty!")); return phi::KernelKey(input_data_type, ctx.GetPlace()); // return phi::KernelKey(framework::proto::VarType::FP32, @@ -201,13 +200,13 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cvm_dims.size(), 2, - platform::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); + phi::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); for (size_t i = 0; i < og_dims.size(); i++) { PADDLE_ENFORCE_EQ( og_dims[i].size(), x_dims[i].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of output grad must equal to Input(X). But " "received: input rank %u, input shape [%s].", og_dims[i].size(), @@ -217,7 +216,7 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( o_dim, x_dims[i][og_dims[i].size() - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " @@ -230,7 +229,7 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( og_dims[i][og_dims[i].size() - 1], x_dims[i][og_dims[i].size() - 1] - cvm_offset, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index 362860aa23bdf..df00c74a30237 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -463,7 +463,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please cheack, last batchsize is %d, current " "batchsize is %d", @@ -550,7 +550,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please cheack, last batchsize is %d, current " "batchsize is %d", diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h index dcc76bbf95254..bd4475da0b8ea 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h @@ -27,7 +27,7 @@ template class FusedSeqpoolCVMOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented CPU kernel for FusedSeqpoolCVMOp, only support GPU " "now.")); } @@ -37,7 +37,7 @@ template class FusedSeqpoolCVMGradOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented CPU kernel for FusedSeqpoolCVMGradOp, only support GPU " "now.")); } diff --git a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h index 12e511fe3aef9..cb3292a60ebd2 100644 --- a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h +++ b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h @@ -123,9 +123,10 @@ __global__ void FusedSoftmaxMaskVecKernel(T* dst, // #define SELECT_SOFTMAX_MASK_KERNEL(ELEMENTS) \ // do { \ // if (sizeof(T) == 2 && seq_len % 8 == 0) { \ -// FusedSoftmaxMaskVecKernel \ +// FusedSoftmaxMaskVecKernel \ // <<>>( \ -// (plat::float16*)dst, (const plat::float16*)src, mask, seq_len); \ +// (phi::dtype::float16*)dst, (const phi::dtype::float16*)src, mask, +// seq_len); \ // } \ // else if (seq_len % 4 == 0) SOFTMAX_MASK_KERNEL(4, ELEMENTS); \ // else if (seq_len % 2 == 0) SOFTMAX_MASK_KERNEL(2, ELEMENTS); \ @@ -159,9 +160,9 @@ void LaunchFusedSoftmaxMaskKernel(const T* src, PADDLE_ENFORCE_EQ( seq_len > 0 && seq_len <= 4096, true, - platform::errors::InvalidArgument("seq_len must be between (0, 4096] " - "received the seq_len is %d", - seq_len)); + phi::errors::InvalidArgument("seq_len must be between (0, 4096] " + "received the seq_len is %d", + seq_len)); constexpr int block_size = 128; constexpr int warp_size = 32; @@ -196,7 +197,7 @@ void LaunchFusedSoftmaxMaskKernel(const T* src, CASE_SOFTMAX_MASK_KERNEL(64); // <=2048 CASE_SOFTMAX_MASK_KERNEL(128); // <=4096 default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "seq_len must be between (0, 4096], received the seq_len is %d", seq_len)); } diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc index 41a9299f7258c..6e1536e9934a2 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc @@ -33,21 +33,21 @@ class ConvInceptionFusionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 4, - platform::errors::InvalidArgument("Conv intput should be 4-D tensor.")); + phi::errors::InvalidArgument("Conv intput should be 4-D tensor.")); PADDLE_ENFORCE_EQ( w_dims.size(), 4, - platform::errors::InvalidArgument("There should be 4 filters.")); + phi::errors::InvalidArgument("There should be 4 filters.")); PADDLE_ENFORCE_EQ(w_dims[0][1], in_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid filter channel number %d, which should be " "equal to input channel number %d.", w_dims[0][1], in_dims[1])); PADDLE_ENFORCE_EQ(w_dims[1][1], in_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid filter channel number %d, which should be " "equal to input channel number %d.", w_dims[1][1], diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 1c3b37d12d689..dc3a223d745b3 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -36,7 +36,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank must be 2, but received x's rank " "is:%d, x dim is:[%s]", x_dims.size(), @@ -48,7 +48,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) should be " "same, but received h0 dims is:[%s], c0 dims is:[%s]", h_dims, @@ -58,14 +58,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto wx_dims = ctx->GetInputDim("WeightX"); PADDLE_ENFORCE_EQ(wx_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightX) should be 2, but received " "WeightX's rank is:%d, WeightX dim is:[%s]", wx_dims.size(), wx_dims)); PADDLE_ENFORCE_EQ(wx_dims[0], x_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightX) " "should equal to second dimension of Input(X), but " "received WeightX first dim is:%d, X second dim is:%d", @@ -77,14 +77,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(wh_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightH) should be 2, but received " "WeightH rank is:%d, WeightH dim is:[%s]", wh_dims.size(), wh_dims)); PADDLE_ENFORCE_EQ(wh_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightH) " "should equal to frame size, but received WeightH " "first dim is:%d, frame size is:%d.", @@ -93,7 +93,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(wh_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WeightH) " "should equal to 4 * frame_size, but received WeightH " "second dimension is:%d, frame size is:%d.", @@ -103,14 +103,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto b_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received " "Bias rank is:%d, Bias dim is:[%s]", b_dims.size(), b_dims)); PADDLE_ENFORCE_EQ(b_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Bias) should be 1, but " "received Bias's dimension is:[%s]", b_dims)); @@ -118,7 +118,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { if (ctx->Attrs().Get("use_peepholes")) { PADDLE_ENFORCE_EQ(b_dims[1], 7 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "7 * %d if enable peepholes connection, but received " "Bias dim is:[%s]", @@ -129,7 +129,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( b_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "4 * %d if disable peepholes, but received Bias dim is:[%s]", frame_size, diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc index b489f5e458bc1..725eb2682e1a2 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc @@ -26,24 +26,24 @@ void FusionSeqPoolConcatOp::InferShape( framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusionSeqPoolConcatOp should be greater " "than 1, but received value is %d.", ctx->Inputs("X").size())); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "FusionSeqPoolConcat"); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, - 1, - platform::errors::InvalidArgument( - "FusionSeqPoolConcatOp only supports concat " - "axis=1 yet, but received axis value is %d", - axis)); + PADDLE_ENFORCE_EQ( + axis, + 1, + phi::errors::InvalidArgument("FusionSeqPoolConcatOp only supports concat " + "axis=1 yet, but received axis value is %d", + axis)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); PADDLE_ENFORCE_GT(n, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensors count should be greater than 0, " "but received value is %d.", n)); @@ -55,7 +55,7 @@ void FusionSeqPoolConcatOp::InferShape( // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be equal to 2, " "but received value is %d.", ins_dims[0].size())); @@ -116,7 +116,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { int w = static_cast(ins[0]->numel() / x0_dims[0]); PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of dims[1] should be dividable of w, but " "dims[1] is %d, w is %d.", y_dims[1], @@ -140,7 +140,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(ins[i]->numel() / x_dims[0]), w, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Width of all inputs should be equal, but the width of the %d-th " "input %d is not equal to the previous %d", i, @@ -149,7 +149,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_lod.size(), bs + 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of all inputs should be equal, but the value of the " "%d-th %d is not equal to the previous %d.", i, diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index 5bcd4d2fbc75a..352d427b8ab91 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -27,33 +27,31 @@ void FusionSeqPoolCVMConcatOp::InferShape( PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty.")); PADDLE_ENFORCE( ctx->HasOutput("Out"), - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of FusionSeqPoolCVMConcatOp should not be null.")); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, - 1, - paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "concat axis=1 yet, but received %d.", - axis)); + PADDLE_ENFORCE_EQ( + axis, + 1, + phi::errors::InvalidArgument("FusionSeqPoolCVMConcatOp only supports " + "concat axis=1 yet, but received %d.", + axis)); bool use_cvm = ctx->Attrs().Get("use_cvm"); - PADDLE_ENFORCE_EQ(use_cvm, - true, - paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "use_cvm is true yet, but received %d.", - use_cvm)); + PADDLE_ENFORCE_EQ( + use_cvm, + true, + phi::errors::InvalidArgument("FusionSeqPoolCVMConcatOp only supports " + "use_cvm is true yet, but received %d.", + use_cvm)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, - 0UL, - paddle::platform::errors::InvalidArgument( - "Input tensors count should > 0.")); + PADDLE_ENFORCE_GT( + n, 0UL, phi::errors::InvalidArgument("Input tensors count should > 0.")); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } @@ -62,7 +60,7 @@ void FusionSeqPoolCVMConcatOp::InferShape( // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be 2.")); ctx->SetOutputDim("Out", {-1, ins_dims[0][axis] * static_cast(n)}); } @@ -120,7 +118,7 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { int w = static_cast(ins[0]->numel() / x0_dims[0]); PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of dims[1] should be dividable of w")); phi::jit::seq_pool_attr_t attr(w, phi::jit::SeqPoolType::kSum); if (pooltype == "AVERAGE") { @@ -138,13 +136,13 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { auto x_lod = ins[i]->lod()[0]; const T* src = ins[i]->data(); T* dst = y_data + i * w; - PADDLE_ENFORCE_EQ(static_cast(ins[i]->numel() / x_dims[0]), - w, - paddle::platform::errors::InvalidArgument( - "Width of all inputs should be equal.")); + PADDLE_ENFORCE_EQ( + static_cast(ins[i]->numel() / x_dims[0]), + w, + phi::errors::InvalidArgument("Width of all inputs should be equal.")); PADDLE_ENFORCE_EQ(x_lod.size(), bs + 1, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of all inputs should be equal.")); for (size_t j = 0; j < bs; ++j) { attr.h = static_cast(x_lod[j + 1] - x_lod[j]); diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc index df8e8c956a045..7011dfebb6719 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.cc +++ b/paddle/fluid/operators/fused/multi_gru_op.cc @@ -37,10 +37,10 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( x_mat_dims.size(), 2, - platform::errors::InvalidArgument("The size of input X dims should be 2, " - "or 3 with second dimension equal to " - "1, but now Input X dim is:[%s] ", - x_dims)); + phi::errors::InvalidArgument("The size of input X dims should be 2, " + "or 3 with second dimension equal to " + "1, but now Input X dim is:[%s] ", + x_dims)); auto layers = ctx->Attrs().Get("layers"); auto wx_dims = ctx->GetInputsDim("WeightX"); @@ -48,7 +48,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wx_dims[i][0], x_mat_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of flattened WeightX #%d" "should equal to last dimension of flattened input X, but " "received fattened WeightX dimension is:%d, flattened X dimension " @@ -62,7 +62,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { for (int i = 0; i < 2 * layers; ++i) { PADDLE_ENFORCE_EQ(wx_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of WeightX #%d should be 2, but received " "WeightX dim size is:%d, WeightX dim is:[%s] ", i, @@ -70,7 +70,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { wx_dims[i])); PADDLE_ENFORCE_EQ(wh_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of WeightH #%d should be 2, but received " "WeightH dim size is:%d, WeightH dim is:[%s] ", i, @@ -80,7 +80,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wh_dims[i][1], 3 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of WeightH #%d " "should equal to 3 * frame_size, but received WeightH's " "second dimension is: %d, frame size is:%d", @@ -90,7 +90,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wx_dims[i][1], 3 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of WeightX #%d " "should equal to 3 * frame_size, but received WeightX's " "second dimension is: %d, frame size is:%d", @@ -105,7 +105,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { int frame_size = static_cast(wh_dims[i][0]); PADDLE_ENFORCE_EQ(b_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Bias #%d should be 2, but received " "Bias rank is:%d, Bias dim is:[%s]", i, @@ -113,7 +113,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { b_dims[i])); PADDLE_ENFORCE_EQ(b_dims[i][0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Bias #%d should be 1, but " "received Bias first dim is:%d, Bias dim is:[%s]", i, @@ -122,7 +122,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( b_dims[i][1], frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias #%d must be [1, frame_size * 3], but " "received bias dim is:[%s], frame size is:%d", i, diff --git a/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc b/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc index c85022e08bcc7..68c73f7d3500b 100644 --- a/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc +++ b/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc @@ -66,17 +66,17 @@ class LSTMMKLDNNHandler PADDLE_ENFORCE_EQ( ctx.Attr("gate_activation"), "sigmoid", - platform::errors::Unimplemented("oneDNN fusion_lstm supports only " - "sigmoid as a gate activation.")); + phi::errors::Unimplemented("oneDNN fusion_lstm supports only " + "sigmoid as a gate activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("cell_activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_lstm supports only tanh as a cell activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("candidate_activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_lstm supports only tanh a candidate activation.")); // Weights for int8 kernel are of a type s8 @@ -325,7 +325,7 @@ template class FusionLSTMMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const bool is_bf16 = std::is_same::value; + const bool is_bf16 = std::is_same::value; const bool force_fp32_output = ctx.Attr("force_fp32_output"); // BF16 does not support force output @@ -407,14 +407,11 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { handler.template AcquireWeightHMemory(weight_h); } else if (framework::TransToProtoVarType(weight_h->dtype()) == paddle::framework::proto::VarType_Type_BF16) { - h0_memory_p = - handler.template AcquireH0Memory(h0); + h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = - handler.template AcquireWeightXMemory( - weight_x); + handler.template AcquireWeightXMemory(weight_x); weight_h_memory_p = - handler.template AcquireWeightHMemory( - weight_h); + handler.template AcquireWeightHMemory(weight_h); } else { h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = @@ -478,4 +475,4 @@ PD_REGISTER_STRUCT_KERNEL(fusion_lstm, ops::FusionLSTMMKLDNNKernel, float, uint8_t, - paddle::platform::bfloat16) {} + phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc b/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc index 8e11c91a117d1..c9545876a0dc6 100644 --- a/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc +++ b/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc @@ -69,29 +69,29 @@ class MultiGRUHandler { PADDLE_ENFORCE_EQ( weights_x_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of WeightX inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of WeightX inputs does " + "not match the number of layers.")); PADDLE_ENFORCE_EQ( weights_h_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of WeightH inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of WeightH inputs does " + "not match the number of layers.")); if (!biases_.empty()) PADDLE_ENFORCE_EQ( biases_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of Bias inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of Bias inputs does " + "not match the number of layers.")); // oneDNN kernel has hardcoded activation functions PADDLE_ENFORCE_EQ( ctx.Attr("gate_activation"), "sigmoid", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_gru supports only sigmoid as a gate activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_gru supports only tanh as an activation.")); N_ = x_lod_.size() - 1; // Number of sentences (batches) @@ -134,7 +134,7 @@ class MultiGRUHandler { PADDLE_ENFORCE_EQ( scale_weights.size(), layers_ * 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of weight scale inputs does " "not match the number of layers. Expected: %d. Actual: %d", layers_ * 2, @@ -212,7 +212,7 @@ class MultiGRUHandler { attrs_[2 * layer + (dir == R2L)]); PADDLE_ENFORCE_NOT_NULL( pd, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Primitive descriptor for gru_forward cannot be null.")); dev_ctx_.SetBlob(pd_key, pd); } diff --git a/paddle/fluid/operators/fused/quant_dequant_kernel.h b/paddle/fluid/operators/fused/quant_dequant_kernel.h index 8e8fdc95e91b5..63dbee42d6e7a 100644 --- a/paddle/fluid/operators/fused/quant_dequant_kernel.h +++ b/paddle/fluid/operators/fused/quant_dequant_kernel.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/operators/fake_quantize_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc index 58125a9b7f674..37315367189fa 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op.cc @@ -112,29 +112,29 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { // make sure Mean/RunningMean and Var/RunningVar share memory PADDLE_ENFORCE_EQ(ctx->Inputs("Mean1")[0], ctx->Outputs("Mean1Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean1 and Mean1Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var1")[0], ctx->Outputs("Var1Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var1 and Var1Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Mean2")[0], ctx->Outputs("Mean2Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean2 and Mean2Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var2")[0], ctx->Outputs("Var2Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var2 and Var2Out should share the same memory")); if (has_shortcut) { PADDLE_ENFORCE_EQ(ctx->Inputs("Mean3")[0], ctx->Outputs("Mean3Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean3 and Mean3Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var3")[0], ctx->Outputs("Var3Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var3 and Var3Out should share the same memory")); } @@ -143,10 +143,10 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( data_format, "NCHW", - platform::errors::InvalidArgument("The data format must equal to NCHW. " - "But received: the data format " - "= [%s]", - data_format)); + phi::errors::InvalidArgument("The data format must equal to NCHW. " + "But received: the data format " + "= [%s]", + data_format)); int stride1 = ctx->Attrs().Get("stride1"); int stride2 = ctx->Attrs().Get("stride2"); int padding1 = ctx->Attrs().Get("padding1"); @@ -158,13 +158,13 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x1_dims.size(), 4, - platform::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x1_dims, - x1_dims.size())); + phi::errors::InvalidArgument("The dimensions of input " + "must equal to 4." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x1_dims, + x1_dims.size())); // Calculate the dims of output1 int batch = x1_dims[0]; @@ -226,26 +226,26 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { // By default, the type of the scale, bias, mean, // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale1")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias1")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale2")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias2")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale1")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias1")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale2")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias2")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -546,8 +546,7 @@ class ResNetBasicBlockGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const { PADDLE_ENFORCE_NOT_NULL( ctx.InputVar(framework::GradVarName("Y")), - platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 16e2261f1afb5..50a3b3c46137d 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -298,10 +298,9 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); // input const phi::DenseTensor* x = ctx.Input("X"); @@ -704,10 +703,9 @@ class ResNetBasicBlockGradXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); const phi::DenseTensor* y_grad = ctx.Input(framework::GradVarName("Y")); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 5827cd3427dee..d4e9b3f8e4525 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -101,22 +101,22 @@ class ResNetUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->Inputs("MeanX")[0], ctx->Outputs("RunningMeanX")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "MeanX and RunningMeanX should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("VarX")[0], ctx->Outputs("RunningVarX")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "VarX and RunningVarX should share the same memory")); if (has_shortcut) { PADDLE_ENFORCE_EQ( ctx->Inputs("MeanZ")[0], ctx->Outputs("RunningMeanZ")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "MeanZ and RunningMeanZ should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("VarZ")[0], ctx->Outputs("RunningVarZ")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "VarZ and RunningVarZ should share the same memory")); } @@ -132,25 +132,25 @@ class ResNetUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4, - platform::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); - PADDLE_ENFORCE_EQ(w_dims.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of filter " - "must equal to 4." - "But received: the shape of filter " - "= [%s], the dimension of filter = [%d] ", - w_dims, - w_dims.size())); + phi::errors::InvalidArgument("The dimensions of input " + "must equal to 4." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); + PADDLE_ENFORCE_EQ( + w_dims.size(), + 4, + phi::errors::InvalidArgument("The dimensions of filter " + "must equal to 4." + "But received: the shape of filter " + "= [%s], the dimension of filter = [%d] ", + w_dims, + w_dims.size())); PADDLE_ENFORCE_EQ(bn_param_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of bn param " "must equal to 4." "But received: the shape of bn param " @@ -208,16 +208,16 @@ class ResNetUnitOp : public framework::OperatorWithKernel { // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("ScaleX")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("BiasX")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("ScaleX")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("BiasX")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -394,8 +394,7 @@ class ResNetUnitGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL( ctx.InputVar(framework::GradVarName("Y")), - platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu index 5b126008bf654..6afe03a67ceab 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ b/paddle/fluid/operators/fused/resnet_unit_op.cu @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h" #include "paddle/fluid/operators/fused/cudnn_norm_conv.cu.h" #include "paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -30,10 +30,10 @@ class ResNetUnitKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + phi::errors::PreconditionNotMet("It must use CUDAPlace.")); PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, CUDNN_DATA_HALF, - platform::errors::Unavailable( + phi::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); // input x @@ -230,10 +230,10 @@ class ResNetUnitGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + phi::errors::PreconditionNotMet("It must use CUDAPlace.")); PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, CUDNN_DATA_HALF, - platform::errors::Unavailable( + phi::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); const phi::DenseTensor *y_grad = @@ -420,10 +420,10 @@ class ResNetUnitGradKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; PD_REGISTER_STRUCT_KERNEL( - resnet_unit, GPU, ALL_LAYOUT, ops::ResNetUnitKernel, plat::float16) {} + resnet_unit, GPU, ALL_LAYOUT, ops::ResNetUnitKernel, phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, GPU, ALL_LAYOUT, ops::ResNetUnitGradKernel, - plat::float16) {} + phi::dtype::float16) {} #endif diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc index c00e58f8463ab..f50d452d6c285 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -26,10 +26,9 @@ class ResNetUnitXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(place), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); // input x @@ -188,10 +187,9 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(place), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); const phi::DenseTensor *y_grad = @@ -365,11 +363,11 @@ PD_REGISTER_STRUCT_KERNEL(resnet_unit, XPU, ALL_LAYOUT, ops::ResNetUnitXPUKernel, - plat::float16, + phi::dtype::float16, float) {} PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, XPU, ALL_LAYOUT, ops::ResNetUnitGradXPUKernel, - plat::float16, + phi::dtype::float16, float) {} diff --git a/paddle/fluid/operators/fused/unity_build_rule.cmake b/paddle/fluid/operators/fused/unity_build_rule.cmake index b7405f93c3585..9ef1e53891d52 100644 --- a/paddle/fluid/operators/fused/unity_build_rule.cmake +++ b/paddle/fluid/operators/fused/unity_build_rule.cmake @@ -12,5 +12,5 @@ register_unity_group( fusion_lstm_op.cc fusion_seqpool_concat_op.cc multi_gru_op.cc - mkldnn/multi_gru_mkldnn_op.cc + onednn/multi_gru_onednn_op.cc fusion_seqpool_cvm_concat_op.cc) diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc index 12c8ec9b81db1..851c448865363 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc @@ -30,9 +30,9 @@ class SoftmaxMaskFuseUpperTriangleOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4, - platform::errors::InvalidArgument("Input x must be in 4D dimension but " - "received the dimension of X is %d", - x_dims.size())); + phi::errors::InvalidArgument("Input x must be in 4D dimension but " + "received the dimension of X is %d", + x_dims.size())); ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", "Out"); diff --git a/paddle/fluid/operators/fused_token_prune_op.cc b/paddle/fluid/operators/fused_token_prune_op.cc index 9fab5c8e7c48d..144e91be396de 100644 --- a/paddle/fluid/operators/fused_token_prune_op.cc +++ b/paddle/fluid/operators/fused_token_prune_op.cc @@ -107,59 +107,59 @@ class FusedTokenPruneOp : public framework::OperatorWithKernel { auto new_mask_dim = ctx->GetInputDim("NewMask"); // check input dims number - PADDLE_ENFORCE_EQ(mask_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input mask must be 4-dimension")); - PADDLE_ENFORCE_EQ(attn_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input attn must be 4-dimension")); + PADDLE_ENFORCE_EQ( + mask_dim.size(), + 4, + phi::errors::InvalidArgument("The input mask must be 4-dimension")); + PADDLE_ENFORCE_EQ( + attn_dim.size(), + 4, + phi::errors::InvalidArgument("The input attn must be 4-dimension")); PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The input x must be 4-dimension")); - PADDLE_ENFORCE_EQ(new_mask_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input attn must be 4-dimension")); + phi::errors::InvalidArgument("The input x must be 4-dimension")); + PADDLE_ENFORCE_EQ( + new_mask_dim.size(), + 4, + phi::errors::InvalidArgument("The input attn must be 4-dimension")); // check input dims relations PADDLE_ENFORCE_EQ(mask_dim[0], attn_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of mask and attn should be the same" "which is batch size")); PADDLE_ENFORCE_EQ(mask_dim[1], attn_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of mask and attn should be the same" "which is nb_head")); PADDLE_ENFORCE_EQ(mask_dim[0], x_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of mask and x should be the same" "which is batch size")); PADDLE_ENFORCE_EQ( mask_dim[2], mask_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim and the fourth dim of mask should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ( attn_dim[2], attn_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim and the fourth dim of mask should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ(attn_dim[2], mask_dim[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of mask and attn should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ(attn_dim[2], x_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of mask and the second dim of attn" "should be the same which is max seq len")); diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index a0f4f7a7e22fa..d9276c5eb9d62 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -105,8 +105,8 @@ phi::KernelKey GetConcatExpectedKernelType( op_ptr->SetDnnFallback(true); } if (flag == 0) { - PADDLE_THROW(platform::errors::InvalidArgument( - "All Inputs of Concat OP are Empty!")); + PADDLE_THROW( + phi::errors::InvalidArgument("All Inputs of Concat OP are Empty!")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -128,7 +128,7 @@ phi::KernelKey GetReduceExpectedKernelType( platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU or NPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -236,7 +236,7 @@ phi::KernelKey GetSoftmaxExpectedKernelType( platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU/XPU and custom place")); } return phi::KernelKey( @@ -255,7 +255,7 @@ phi::KernelKey GetSoftmaxGradExpectedKernelType( if (!(platform::is_gpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()))) - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "float16 can only be used on GPU/XPU and custom place")); } return phi::KernelKey( @@ -275,7 +275,7 @@ phi::KernelKey GetStridedSliceExpectedKernelType( platform::is_same_place(tensor.place(), ctx.device_context().GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Place of context is %s. Place of input tensor is %s. They " "are should be same, but reveived different place.", string::to_string(ctx.device_context().GetPlace()), @@ -375,18 +375,18 @@ phi::KernelKey GetInstanceNormExpectedKernelType( in_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(in_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(in_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -423,7 +423,7 @@ phi::KernelKey GetConvExpectedKernelType( PADDLE_ENFORCE_EQ( input_data_type, filter_data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input and filter data type should be consistent, " "but received input data type is %s and filter type " "is %s", diff --git a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 index 068704e6d0687..c42032a45cdcd 100644 --- a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 +++ b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 @@ -811,7 +811,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO {% if "tensor_name" in attr_dict[attrs[i]] %} auto {{'tensor_' + attrs[i]}} = this->GetOptionalSingleForwardInput("{{attr_dict[attrs[i]]['tensor_name']}}"); if ({{'tensor_' + attrs[i]}}) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic tensor attribute {{attr_dict[attrs[i]]['tensor_name']}} for {{op_name}} composite" "for now. ")); } @@ -819,7 +819,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO {% if "tensors_name" in attr_dict[attrs[i]] %} auto {{'tensors_' + attrs[i]}} = this->GetOptionalMultiForwardInput("{{attr_dict[attrs[i]]['tensors_name']}}"); if ({{'tensors_' + attrs[i]}}) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic tensors attribute {{attr_dict[attrs[i]]['tensor_name']}} for {{op_name}} composite " "for now. ")); } diff --git a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc index 8ae92b04b7df4..d11b445f3a9b8 100644 --- a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc +++ b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc @@ -30,7 +30,7 @@ class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("X").front(), framework::proto::VarType::SELECTED_ROWS, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input X(%s)'s type should be SelectedRows, " "but the received is %s", ctx->Inputs("X").front(), @@ -38,7 +38,7 @@ class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetOutputsVarType("Out").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output Out(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx->Outputs("Out").front(), diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index c88d36602bd79..6fdd6d380a7fe 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -38,7 +38,7 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); @@ -87,7 +87,7 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index e23b3c6c42d5f..c948315189a15 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -52,7 +52,7 @@ class GRUOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUOp, but received %d " "(Input) vs %d (frame_size).", @@ -62,7 +62,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_dims[1], frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_dims[0], @@ -74,7 +74,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( h0_dims[1], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Input(H0) must be equal to frame_size, but " "received %d (width of H0) vs %d (frame_size).", h0_dims[1], @@ -87,7 +87,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -96,7 +96,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -233,7 +233,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 times of " "frame_size in GRUOp, but received %d (Input) vs %d (frame_size).", input_size, @@ -241,7 +241,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_height, @@ -251,7 +251,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_height, @@ -263,7 +263,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( h0_dims[1], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Input(H0) must be equal to frame_size, but " "received %d (width of H0) vs %d (frame_size).", h0_dims[1], @@ -279,7 +279,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -288,7 +288,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -406,7 +406,7 @@ class GRUCPUKernel : public framework::OpKernel { frame_size /*height of height*/); PADDLE_ENFORCE_NOT_NULL( packed_gate, - platform::errors::NotFound( + phi::errors::NotFound( "The calculation result of packed_gate by " "GEMM_ALLOC should not be null when using MKL.")); blas.GEMM_PACK(CblasBMatrix, @@ -424,7 +424,7 @@ class GRUCPUKernel : public framework::OpKernel { frame_size /*height of height*/); PADDLE_ENFORCE_NOT_NULL( packed_state, - platform::errors::NotFound( + phi::errors::NotFound( "The calculation result of packed_state by " "GEMM_ALLOC should not be null when using MKL.")); blas.GEMM_PACK(CblasBMatrix, diff --git a/paddle/fluid/operators/gru_unit_op.cc b/paddle/fluid/operators/gru_unit_op.cc index b217d58e6d5da..5a29abda1f369 100644 --- a/paddle/fluid/operators/gru_unit_op.cc +++ b/paddle/fluid/operators/gru_unit_op.cc @@ -45,7 +45,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { if (ctx->IsRuntime() || input_size >= 0) { PADDLE_ENFORCE_EQ(input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUUnitOp, but received %d " "(Input) vs %d (frame_size).", @@ -55,7 +55,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -66,7 +66,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -82,7 +82,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -91,7 +91,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -203,7 +203,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUUnitGradOp, but received %d " "(Input) vs %d (frame_size).", @@ -213,7 +213,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitGradOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -224,7 +224,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitGradOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -240,7 +240,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -249,7 +249,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 933176433e2d7..fa774e2bef3c2 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -46,7 +46,7 @@ class GRUUnitKernel : public framework::OpKernel { else ReluCUDAFunctor()(d, x, y); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported activation type, only supports identity, sigmoid, tanh " "and relu.")); } @@ -169,7 +169,7 @@ class GRUUnitGradKernel : public framework::OpKernel { else if (act_type == relu) ReluGradFunctor()(d, x, y, dy, dx); else - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported activation type, only supports identity, sigmoid, tanh " "and relu.")); } diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index 03887561934b7..002a98f3538e4 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -46,7 +46,7 @@ class HashOp : public framework::OperatorWithKernel { auto dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of hash_op's dimensions must be 2")); std::vector out_dims; int num_hash = ctx->Attrs().Get("num_hash"); diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index dea3ce3fe695b..dcf16cf104cc8 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -35,7 +35,7 @@ class HingeLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( pred_dims, label_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(input) and Input(label) should have the same " "shape, but received input shape [%s] != label shape [%s]", pred_dims, @@ -44,13 +44,13 @@ class HingeLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( pred_dims.size(), 2, - platform::errors::InvalidArgument("Input(input) rank should be 2, " - "but received input rank(%d) != 2", - pred_dims.size())); + phi::errors::InvalidArgument("Input(input) rank should be 2, " + "but received input rank(%d) != 2", + pred_dims.size())); PADDLE_ENFORCE_EQ(pred_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(input) should be 1, " "as each row of input contains a real value, " "but received second dimension of input (%d) != 1", @@ -112,7 +112,7 @@ class HingeLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(loss_grad_dims, pred_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of loss gradient should be the same as " "the shape of Input(input), but received the loss " "gradient shape [%s] != input shape [%s]", diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 0486dd12c4519..d11734c1a6c99 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -27,19 +27,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "The input 'X' of Im2SequenceOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("The input 'X' of Im2SequenceOp is not found.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output 'Out' of Im2SequenceOp is not found.")); auto in_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(in_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of input 'X' in Im2SequenceOp " "should be 4. But " "received dimensions size=[%d], dimensions=[%s].", @@ -159,13 +159,13 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The input 'X' of Im2SequenceGradOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::NotFound( - "The input %s of Im2SequenceGradOp is not found.", - framework::GradVarName("Out"))); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::NotFound("The input %s of Im2SequenceGradOp is not found.", + framework::GradVarName("Out"))); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } }; diff --git a/paddle/fluid/operators/index_impl.cu.h b/paddle/fluid/operators/index_impl.cu.h index 629717f61933a..364eb9d574036 100644 --- a/paddle/fluid/operators/index_impl.cu.h +++ b/paddle/fluid/operators/index_impl.cu.h @@ -89,7 +89,7 @@ void IndexKernel(const KPDevice &dev_ctx, Tensor *out, Functor func) { <<>>(out_data, numel, main_offset, func); break; default: { - PADDLE_THROW(paddle::platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported vectorized size: %d !", vec_size)); break; } diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index c06885633f348..8ac3fee1d0452 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -59,7 +59,7 @@ void IndexSelectInner(const framework::ExecutionContext& context, PADDLE_ENFORCE_GE( index_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (index) of OP(index_select) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -68,7 +68,7 @@ void IndexSelectInner(const framework::ExecutionContext& context, PADDLE_ENFORCE_LT( index_data[i], input_dim[dim], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (index) of OP(index_select) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 4d7730d687d8d..21cd6ad3e084a 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -31,7 +31,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ("linear", interp_method, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"linear\" when" "Input(X) dimension is 3, but got method = %s .", interp_method)); @@ -44,7 +44,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'size of Op(interpolate) must be 1. " "Attr(out_shape)'s length must be 1 for 3-D input tensor, but got " "size = %d .", @@ -67,7 +67,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got dimension = %d .", scale_tensor.size())); out_w = -1; @@ -90,13 +90,13 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension size must be 1, but got dimension = %d .", out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's 0-th dimension's value must be 1, but got value = %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -119,7 +119,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || "bicubic" == interp_method, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"bilinear\" " "or \"nearest\" or \"bicubic\" when " "Input(X) dimension is 4, but got method is %s.", @@ -133,7 +133,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'size of Op(interpolate) must be 2. " "Attr(out_shape)'s length must be 2 for 4-D input " "tensor, but got size = %d .", @@ -157,7 +157,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got dimension = %d .", scale_tensor.size())); out_h = -1; @@ -186,13 +186,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument("OutSize's dimension size must be 1, " - "but got dimension size is %d .", - out_size_dim.size())); + phi::errors::InvalidArgument("OutSize's dimension size must be 1, " + "but got dimension size is %d .", + out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension[0] must be 2, but got dimension[0] is %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -215,7 +215,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( "trilinear", interp_method, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"trilinear\" when Input(X) " "dimension is 5, but got method = %s .", interp_method)); @@ -228,7 +228,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'s size of Op(interpolate) must be 3. " "Attr(out_shape)'s length must be 3 for 5-D input " "tensor, but got size = %d .", @@ -253,7 +253,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got size = %d .", scale_tensor.size())); out_d = -1; @@ -288,12 +288,12 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension size must be 1, but got size is %d.", out_size_dim.size())); PADDLE_ENFORCE_EQ(out_size_dim[0], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dim[0] must be 3, but got size is %d.", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -321,7 +321,7 @@ class InterpolateOp : public framework::OperatorWithKernel { auto dim_x = ctx->GetInputDim("X"); // NCHW format PADDLE_ENFORCE( dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5, - platform::errors::Unimplemented( + phi::errors::Unimplemented( "Input(X) dimension must be 3, 4 or 5, but got dimension = %d .", dim_x.size())); if (dim_x.size() == 3) { diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index bfbb15b076448..8a71b6d96a055 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -953,7 +953,7 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -1049,12 +1049,12 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); @@ -1205,17 +1205,17 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_d, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_d in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); @@ -1648,7 +1648,7 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::NotFound("This kernel only runs on GPU device.")); + phi::errors::NotFound("This kernel only runs on GPU device.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); @@ -1670,7 +1670,7 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::NotFound("This kernel only runs on GPU device.")); + phi::errors::NotFound("This kernel only runs on GPU device.")); auto* input_grad = ctx.Output(framework::GradVarName("X")); auto* output_grad = diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 563879e301d12..793b5fa629ee1 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -37,7 +37,7 @@ inline std::vector get_new_shape( auto tensor = list_new_shape_tensor[i]; PADDLE_ENFORCE_EQ(tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of dimension tensor should be [1]," "but received d%.", tensor->dims())); @@ -890,7 +890,7 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -969,12 +969,12 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -1090,17 +1090,17 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_d, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_d in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index 710cdaeb707b6..39a2b31fa6925 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -61,7 +61,7 @@ class OverflowOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( true, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input type mismatch, the type of Input(X) must be Tensor or " "SelectedRows, please check your input.")); } diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu index 300229cbeca66..71aaa66a5ad0d 100755 --- a/paddle/fluid/operators/isfinite_op.cu +++ b/paddle/fluid/operators/isfinite_op.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/operators/isfinite_op.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -23,7 +23,9 @@ REGISTER_OP_CUDA_KERNEL( ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel, + ops::OverflowKernel, ops::OverflowKernel); REGISTER_OP_CUDA_KERNEL( @@ -31,5 +33,5 @@ REGISTER_OP_CUDA_KERNEL( ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel, + ops::OverflowKernel, ops::OverflowKernel); diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h index 5352ccc99df92..0eb6243a31873 100644 --- a/paddle/fluid/operators/isfinite_op.h +++ b/paddle/fluid/operators/isfinite_op.h @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/common/transform.h" #include "paddle/phi/kernels/isfinite_kernel.h" #include "paddle/phi/kernels/reduce_all_kernel.h" @@ -89,7 +89,7 @@ inline void TensorContainsNAN(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } inline void TensorContainsInf(const phi::DenseTensor& tensor, phi::DenseTensor* out) { @@ -106,7 +106,7 @@ inline void TensorContainsInf(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } inline void TensorIsfinite(const phi::DenseTensor& tensor, phi::DenseTensor* out) { @@ -123,7 +123,7 @@ inline void TensorIsfinite(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } // copy the result bool to cpu @@ -173,7 +173,7 @@ class OverflowKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(true, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input type mismatch, the type of Input(X) " "must be phi::DenseTensor or " "SelectedRows, please check your input.")); diff --git a/paddle/fluid/operators/l1_norm_op.h b/paddle/fluid/operators/l1_norm_op.h index c268a6c51fbc5..3cfcf1959a387 100644 --- a/paddle/fluid/operators/l1_norm_op.h +++ b/paddle/fluid/operators/l1_norm_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" namespace paddle { namespace operators { @@ -34,7 +34,8 @@ class L1NormKernel : public framework::OpKernel { auto &place = *context.template device_context().eigen_device(); - EigenL1Norm, T>::Eval(place, out, x); + phi::funcs::EigenL1Norm, T>::Eval( + place, out, x); } }; @@ -49,7 +50,7 @@ class L1NormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( d_out->numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(GRAD@Out) of L1NormGradOP should be a scalar.")); phi::DenseTensor *dx = context.Output(framework::GradVarName("X")); @@ -62,7 +63,7 @@ class L1NormGradKernel : public framework::OpKernel { *context.template device_context().eigen_device(); Eigen::DSizes x_dsize(x->numel()); - EigenL1NormGrad, T>::Eval( + phi::funcs::EigenL1NormGrad, T>::Eval( place, dx_eigen, d_out_eigen, x_eigen, x_dsize); } }; diff --git a/paddle/fluid/operators/limit_by_capacity_op.cc b/paddle/fluid/operators/limit_by_capacity_op.cc index 387e30ae647c9..77c29a4cef9f1 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cc +++ b/paddle/fluid/operators/limit_by_capacity_op.cc @@ -52,14 +52,14 @@ class LimitByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( expert_count_dtype, capacity_dtype, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the expert_count and capacity should be same")); PADDLE_ENFORCE_EQ( expert_count_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument("The dtype of the expert_count and " - "capacity should be same as int64")); + phi::errors::InvalidArgument("The dtype of the expert_count and " + "capacity should be same as int64")); return phi::KernelKey(expert_count_dtype, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index e017e43d7db2d..a27863819fedd 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -167,7 +167,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { auto transition_dims = ctx->GetInputDim("Transition"); PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Transition) should be a 2-D tensor. But " "received: input rank %u, input shape [%s].", transition_dims.size(), @@ -181,7 +181,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "An invalid dimension for the Input(Transition), which should " "be a 2-D tensor with shape [(D + 2) x D]. But received: input " "rank %u, " @@ -193,7 +193,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { if (ctx->HasInput("Length")) { PADDLE_ENFORCE_EQ(emission_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 3-D tensor. But " "received: input rank %u, input shape [%s].", emission_dims.size(), @@ -203,7 +203,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { (label_dims.size() == 3UL && label_dims[2] == 1) || (label_dims.size() == 2UL), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 3-D tensor with last dimension " "fixed to 1 or a 2-D tensor in padding mode. But received: input " "rank %u, input shape [%s].", @@ -212,7 +212,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(emission_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch size of Input(Emission) " "and Input(Label) should be the same. But " "received Input(Emission): " @@ -224,7 +224,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { label_dims)); PADDLE_ENFORCE_EQ(emission_dims[1], label_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The max length of Input(Emission) " "and Input(Label) should be the same. But " "received Input(Emission): " @@ -239,7 +239,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( emission_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 2-D tensor. But received: " "input rank %u, input shape [%s].", emission_dims.size(), @@ -247,7 +247,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(emission_dims[1], transition_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of the Input(Emission) and " "the Input(Transition) " "should be equal to the tag number. But received " @@ -264,7 +264,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 2-D tensor with the 2nd " "dimensions fixed to 1. But received: input rank %u, " "input shape [%s].", @@ -274,7 +274,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( emission_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Emission) and Input(Label) " "should be the same. But received Input(Emission): rank %u, " "shape " diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index 2891320506391..01ed8463701e7 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -30,7 +30,7 @@ static inline T NormalizeL1(T* x, size_t len) { PADDLE_ENFORCE_GT( sum, 0., - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The unnormalized probabilities of all possible unfinished " "sequences must be greater than 0.")); T s = 1. / sum; @@ -89,7 +89,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( seq_num, emission_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the size of Input(length) must be equal to " "emission_dims[0]. But input_size = %d, emission_dims[0] = %d.", seq_num, @@ -98,7 +98,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( seq_num, label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the size of Input(length) must be equal to " "label_dims[0]. But input_size = %d, label_dims[0] = %d.", seq_num, @@ -116,10 +116,10 @@ class LinearChainCRFOpKernel : public framework::OpKernel { ctx.device_context(), alpha, static_cast(0.0)); } else { in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE(in_lod.size(), - 0, - platform::errors::InvalidArgument( - "Input(Label) must be a sequence.")); + PADDLE_ENFORCE_NE( + in_lod.size(), + 0, + phi::errors::InvalidArgument("Input(Label) must be a sequence.")); seq_num = in_lod[0].size() - 1; batch_size = emission_dims[0]; tag_num = emission_dims[1]; @@ -233,7 +233,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( static_cast(*std::max_element(lbl, lbl + seq_length)), tag_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "An invalid tag label that excesses the largest tag number.")); // Calculate the nominator part, which depends on the label sequence. @@ -288,10 +288,10 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { {emission_dims[0] * emission_dims[1], emission_dims[2]}); } else { in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE(in_lod.size(), - 0, - platform::errors::InvalidArgument( - "Input(Label) must be a sequence.")); + PADDLE_ENFORCE_NE( + in_lod.size(), + 0, + phi::errors::InvalidArgument("Input(Label) must be a sequence.")); seq_num = static_cast(in_lod[0].size() - 1); } diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h index 3d574b1f844c8..ba55b7066da1e 100644 --- a/paddle/fluid/operators/lite/ut_helper.h +++ b/paddle/fluid/operators/lite/ut_helper.h @@ -60,8 +60,8 @@ void serialize_params(std::string* str, for (const auto& param : params) { PADDLE_ENFORCE_NOT_NULL( scope->FindVar(param), - platform::errors::NotFound("Block should already have a '%s' variable", - param)); + phi::errors::NotFound("Block should already have a '%s' variable", + param)); auto* tensor = scope->FindVar(param)->GetMutable(); framework::SerializeToStream(os, *tensor, ctx); } @@ -81,7 +81,7 @@ void RandomizeTensor(phi::DenseTensor* tensor, const platform::Place& place) { size_t num_elements = analysis::AccuDims(dims, dims.size()); PADDLE_ENFORCE_GT(num_elements, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor dimension of the randomized tensor " "function should be greater than zero.")); platform::CPUPlace cpu_place; diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h index 4641c39111fad..be94eab242491 100644 --- a/paddle/fluid/operators/load_combine_op.h +++ b/paddle/fluid/operators/load_combine_op.h @@ -40,7 +40,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT(out_var_names.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of variables to be loaded is %d, expect " "it to be greater than 0.", out_var_names.size())); @@ -49,7 +49,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(fin), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "LoadCombine operator fails to open file %s, please check " "whether the model file is complete or damaged.", filename)); @@ -58,7 +58,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( filename.empty(), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "LoadCombine operator fails to open file %s, please check " "whether the model file is complete or damaged.", filename)); @@ -81,14 +81,14 @@ class LoadCombineOpKernel : public framework::OpKernel { VLOG(4) << "loading tensor: " << out_var_names[i]; PADDLE_ENFORCE_NOT_NULL( out_vars[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The variable %s to be loaded cannot be found.", out_var_names[i])); // Error checking PADDLE_ENFORCE_EQ( static_cast(*buffer), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "An error occurred while loading model parameters. " "Please check whether the model file is complete or damaged.")); if (out_vars[i]->IsType()) { @@ -142,7 +142,7 @@ class LoadCombineOpKernel : public framework::OpKernel { buffer->peek(); PADDLE_ENFORCE_EQ(buffer->eof(), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "Not allowed to load partial data via " "load_combine_op, please use load_op instead.")); } diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc index a399ad4527ff8..310fb619bcb01 100644 --- a/paddle/fluid/operators/lod_rank_table_op.cc +++ b/paddle/fluid/operators/lod_rank_table_op.cc @@ -72,10 +72,9 @@ output operators. class LoDRankTableInferShape : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext *context) const override { - PADDLE_ENFORCE_EQ( - context->HasInput("X"), - true, - platform::errors::NotFound("LoDRankTable must have input X.")); + PADDLE_ENFORCE_EQ(context->HasInput("X"), + true, + phi::errors::NotFound("LoDRankTable must have input X.")); } }; diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index ae464e7b47161..654bc669c7504 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -33,7 +33,7 @@ class LoDResetOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( static_cast(level0.size()), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Input(Y) is not provided, the output's LoD should be " "specified by attribute 'target_lod'. But the size of " "'target_lod' is 0.")); @@ -252,7 +252,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset, CPU, ALL_LAYOUT, ops::LoDResetKernel, - plat::float16, + phi::dtype::float16, float, double, int, @@ -263,7 +263,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset, XPU, ALL_LAYOUT, ops::LoDResetKernel, - plat::float16, + phi::dtype::float16, float, double, int, @@ -274,7 +274,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset_grad, CPU, ALL_LAYOUT, ops::LoDResetGradKernel, - plat::float16, + phi::dtype::float16, float, double, int, diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index a468577ab9aa1..acba05514226b 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -54,7 +54,7 @@ class LoDResetKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(last_level.back()), in->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last value of Input(Y)'s last level LoD should be equal " "to the first dimension of Input(X). But received the last " "value of Input(Y)'s last level LoD is %d, the first dimension " @@ -79,20 +79,20 @@ class LoDResetKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( level0.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of target LoD should be greater than 1. But received the " "size of target LoD is %d.", level0.size())); PADDLE_ENFORCE_EQ(static_cast(level0[0]), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Target LoD should be a vector starting from 0. But " "target LoD starts from %d.", static_cast(level0[0]))); PADDLE_ENFORCE_EQ( static_cast(level0.back()), in->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last value of 'Target LoD''s last level LoD should be equal " "to the first dimension of Input(X). But received the 'Target LoD' " "is %s, Input(X)'s shape is %s.", @@ -101,7 +101,7 @@ class LoDResetKernel : public framework::OpKernel { for (size_t i = 0; i < level0.size() - 1; ++i) { PADDLE_ENFORCE_GE(level0[i + 1], level0[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'Target LoD' should be an ascending " "vector. But received the Target LoD is %s.", common::make_ddim(level0))); diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index 94b0319729117..42f6a4786fb25 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -70,7 +70,7 @@ struct LoDTensorToArrayFunctor { Apply(static_cast(dev_ctx)); #else PADDLE_THROW( - platform::errors::Unavailable("Paddle is not compiled with CUDA.")); + phi::errors::Unavailable("Paddle is not compiled with CUDA.")); #endif } } @@ -126,11 +126,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( rank_level, x.lod().size(), - platform::errors::InvalidArgument("Input should be a phi::DenseTensor, " - "and its lod_level should be at " - "least %d, but given is %d.", - rank_level + 1, - x.lod().size())); + phi::errors::InvalidArgument("Input should be a phi::DenseTensor, " + "and its lod_level should be at " + "least %d, but given is %d.", + rank_level + 1, + x.lod().size())); out.resize(max_seq_len); std::vector> copy_ranges(max_seq_len); @@ -215,18 +215,18 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of LoDTensorToArrayOp should not be null.")); PADDLE_ENFORCE_EQ( context->HasInput("RankTable"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(RankTable) of LoDTensorToArrayOp should not be null.")); PADDLE_ENFORCE_EQ( context->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of LoDTensorToArrayOp should not be null.")); auto x_dim = context->GetInputDim("X"); diff --git a/paddle/fluid/operators/lookup_table_dequant_op.cc b/paddle/fluid/operators/lookup_table_dequant_op.cc index 93826aab0d573..6f780b946eae8 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.cc +++ b/paddle/fluid/operators/lookup_table_dequant_op.cc @@ -30,17 +30,17 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(W) of LookupTableDequantOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of LookupTableDequantOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of LookupTableDequantOp should not be null.")); auto table_dims = ctx->GetInputDim("W"); @@ -50,7 +50,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'lookup table' must be 2. " "But received lookup table's dimensions = %d, " "lookup table's shape = [%s].", @@ -59,7 +59,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The last dimensions of the 'Ids' tensor must be 1. " "But received Ids's last dimensions = %d, Ids's shape = [%s].", ids_dims[ids_rank - 1], @@ -69,7 +69,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); PADDLE_ENFORCE_GE(table_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the second dim of table_dims should be " "greater or equal to 2, but the actual shape " "is [%s]", diff --git a/paddle/fluid/operators/lookup_table_dequant_op.h b/paddle/fluid/operators/lookup_table_dequant_op.h index 2f5a3d0fd7a16..191f05597668c 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.h +++ b/paddle/fluid/operators/lookup_table_dequant_op.h @@ -65,7 +65,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( table_var->Type(), framework::VarTypeTrait::kId, - platform::errors::InvalidArgument("lookup table must be LodTensor")); + phi::errors::InvalidArgument("lookup table must be LodTensor")); auto *table_t = context.Input("W"); int64_t row_number = table_t->dims()[0]; int64_t quant_number = table_t->dims()[1]; @@ -81,7 +81,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -90,7 +90,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index a8185691c45aa..6818b363bc89a 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_type_inference.h" -#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/phi/common/bfloat16.h" namespace paddle { namespace operators { @@ -40,7 +40,7 @@ class LookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'lookup table' must be 2. " "But received lookup table's dimensions = %d, " "lookup table's shape = [%s].", @@ -49,7 +49,7 @@ class LookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The last dimensions of the 'Ids' tensor must be 1. " "But received Ids's last dimensions = %d, Ids's shape = [%s].", ids_dims[ids_rank - 1], @@ -239,11 +239,11 @@ REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel, ops::LookupTableKernel, ops::LookupTableKernel, - ops::LookupTableKernel); + ops::LookupTableKernel); REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel, ops::LookupTableGradKernel, - ops::LookupTableGradKernel); + ops::LookupTableGradKernel); /* ========================== register checkpoint ===========================*/ diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index ba8af995429a3..46ae30754a933 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_op.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -195,7 +195,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -252,10 +252,10 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(lookup_table, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, - ops::LookupTableCUDAKernel, + ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel); REGISTER_OP_CUDA_KERNEL(lookup_table_grad, ops::LookupTableGradCUDAKernel, ops::LookupTableGradCUDAKernel, - ops::LookupTableGradCUDAKernel); + ops::LookupTableGradCUDAKernel); diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index 21f0bf6a957ae..f4e48065742ca 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -64,7 +64,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -73,7 +73,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -99,7 +99,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); @@ -129,14 +129,14 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); PADDLE_ENFORCE_GE( id_index, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input key should be exists. But received %d.", id_index)); @@ -173,7 +173,7 @@ class LookupTableGradKernel : public framework::OpKernel { auto *table_t = context.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter W of a LookupTable " "must be either phi::DenseTensor or SelectedRows")); } @@ -210,7 +210,7 @@ class LookupTableGradKernel : public framework::OpKernel { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -243,7 +243,7 @@ class LookupTableGradKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -252,7 +252,7 @@ class LookupTableGradKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input" "value.", diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index edd8b20da160c..137d6bea417c3 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_v2_op.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -190,7 +190,7 @@ struct LookupTableV2GradCUDAFunctor { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index 82dbac8b21dfc..cce29cb715563 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -78,7 +78,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -87,7 +87,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -113,7 +113,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); @@ -121,7 +121,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( id_index, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input key should be exists. But received %d.", id_index)); @@ -173,7 +173,7 @@ struct LookupTableV2GradCPUFunctor { auto *table_t = context_.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter W of a LookupTableV2 " "must be either phi::DenseTensor or SelectedRows")); } @@ -209,7 +209,7 @@ struct LookupTableV2GradCPUFunctor { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -242,7 +242,7 @@ struct LookupTableV2GradCPUFunctor { PADDLE_ENFORCE_LT( ids_data[i], N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -251,7 +251,7 @@ struct LookupTableV2GradCPUFunctor { PADDLE_ENFORCE_GE( ids_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index bf4c72a2133b6..705af5f8d0587 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -199,23 +199,23 @@ class LRNOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 4, - platform::errors::InvalidArgument("Input(input) rank should be 4, " - "but received input rank (%d) != 4", - x_dim.size())); + phi::errors::InvalidArgument("Input(input) rank should be 4, " + "but received input rank (%d) != 4", + x_dim.size())); int n = ctx->Attrs().Get("n"); - PADDLE_ENFORCE_GT(n, - 0UL, - platform::errors::InvalidArgument( - "Argument(n) should be positive, " - "but received n(%d) not greater than 0", - n)); - PADDLE_ENFORCE_EQ(n % 2, - 1UL, - platform::errors::InvalidArgument( - "Argument(n) should be odd value, " - "but received n(%d) is not an odd value", - n)); + PADDLE_ENFORCE_GT( + n, + 0UL, + phi::errors::InvalidArgument("Argument(n) should be positive, " + "but received n(%d) not greater than 0", + n)); + PADDLE_ENFORCE_EQ( + n % 2, + 1UL, + phi::errors::InvalidArgument("Argument(n) should be odd value, " + "but received n(%d) is not an odd value", + n)); ctx->SetOutputDim("Out", x_dim); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index 4d1cc268d48b6..063ec6e445044 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -78,21 +78,21 @@ class LRNKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( alpha, 0UL, - platform::errors::InvalidArgument("Argument(alpha) should >= 0.0, " - "but received alpha(%d) less than 0", - alpha)); + phi::errors::InvalidArgument("Argument(alpha) should >= 0.0, " + "but received alpha(%d) less than 0", + alpha)); PADDLE_ENFORCE_GE( beta, 0UL, - platform::errors::InvalidArgument("Argument(beta) should >= 0.0, " - "but received beta(%d) less than 0", - beta)); + phi::errors::InvalidArgument("Argument(beta) should >= 0.0, " + "but received beta(%d) less than 0", + beta)); PADDLE_ENFORCE_GE( k, 0UL, - platform::errors::InvalidArgument("Argument(k) should >= 0.0, " - "but received k(%d) less than 0", - k)); + phi::errors::InvalidArgument("Argument(k) should >= 0.0, " + "but received k(%d) less than 0", + k)); LRNFunctor f; f(ctx, x, out, mid, N, C, H, W, n, k, alpha, beta, data_layout); @@ -165,7 +165,7 @@ class LRNGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( !ctx.Attr("is_test"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "is_test attribute should be set to False in training phase. " "but received is_test == True in training phase.")); diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index 8bdb455375bee..a34fc82fe177c 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -46,20 +46,20 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank must be 2, but received %d.", in_dims.size())); if (ctx->HasInput("H0")) { PADDLE_ENFORCE_EQ( ctx->HasInput("C0"), true, - platform::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " - "should not be null at the same time.")); + phi::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " + "should not be null at the same time.")); auto h_dims = ctx->GetInputDim("H0"); auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) should " "be the same, but received [%s] (H0) vs [%s] (C0).", h_dims, @@ -71,19 +71,19 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Weight) should be 2, but received %d.", w_dims.size())); PADDLE_ENFORCE_EQ(w_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Weight) should be %d, " "but received %d.", frame_size, w_dims[0])); PADDLE_ENFORCE_EQ(w_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Weight) should be 4 * " "%d, but received %d.", frame_size, @@ -93,13 +93,13 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received %d.", b_dims.size())); PADDLE_ENFORCE_EQ( b_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Bias) should be 1, but received %d.", b_dims[0])); @@ -107,7 +107,7 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[1], 7 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be 7 * %d if enable " "peepholes connection, but received %d.", frame_size, @@ -116,7 +116,7 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be 4 * %d if disable " "peepholes connection, but received %d.", frame_size, diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 0e068c47647e3..278fdbdb41761 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -252,7 +252,7 @@ class LSTMGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( frame_size, out_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(" + framework::GradVarName("Hidden") + ") should be %d, but received %d in LSTM@Grad operator.", diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index d2c026a9042c7..77cd6433c69e9 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -98,7 +98,7 @@ void GetClassInterval(const gpuStream_t& stream, if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -109,7 +109,7 @@ void GetClassInterval(const gpuStream_t& stream, comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { @@ -287,7 +287,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -299,7 +299,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, PADDLE_ENFORCE_NE( comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index 746a28ed588d5..e790262a0fd78 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -36,7 +36,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X) should be equal to 2, " "but received %d.", x_dims.size())); @@ -44,7 +44,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(y_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y) should be equal to 2, " "but received %d.", y_dims.size())); @@ -52,7 +52,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto w_dims = ctx->GetInputDim("W"); PADDLE_ENFORCE_EQ(w_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(W) should be equal to 3, " "but received %d.", w_dims.size())); @@ -61,7 +61,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[0], x_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(W) should be equal to the second " "dimension of Input(X). But received the first dimension of Input(W) " "is %d, the second dimension of Input(X) is %d.", @@ -70,7 +70,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[1], dim_t, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(W) should be equal to 'dim_t', but " "received the second dimension of Input(W) is %d, 'dim_t' is %d.", w_dims[1], @@ -78,7 +78,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[2], y_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(W) should be equal to " "the second dimension of Input(Y). But received the last dimension " "of Input(W) is %d, the second dimension of Input(Y) is %d.", @@ -93,19 +93,19 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { const auto& x_lod = x_var->Get().lod(); PADDLE_ENFORCE_EQ(x_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) should hold LoD information, but " "received Input(X).lod() is empty.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE(x_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s LoD data should be " "equal to 2, but received %d.", x_lod_0.size())); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(X)'s LoD data should be " "equal to the first dimension of Input(X). " "But received the last element of Input(X)'s LoD " @@ -118,19 +118,19 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { const auto& y_lod = y_var->Get().lod(); PADDLE_ENFORCE_EQ(y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) should hold LoD information, but " "received Input(Y).lod() is empty.")); const auto& y_lod_0 = y_lod[0]; PADDLE_ENFORCE_GE(y_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y)'s LoD data should be " "equal to 2, but received %d.", y_lod_0.size())); PADDLE_ENFORCE_EQ(y_dims[0], static_cast(y_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(Y)'s LoD data should be " "equal to the first dimension of Input(Y). " "But received the last element of Input(Y)'s LoD " @@ -140,7 +140,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(x_lod_0.size(), y_lod_0.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s and Input(Y)'s LoD " "data should be equal. " "But received the dimensions of Input(X)'s LoD is " @@ -164,17 +164,17 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE( x_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument("The LoD level of Input(X) should be " - "greater than 1, but received %d.", - x_desc->GetLoDLevel())); + phi::errors::InvalidArgument("The LoD level of Input(X) should be " + "greater than 1, but received %d.", + x_desc->GetLoDLevel())); framework::VarDesc* y_desc = PADDLE_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Y")[0]); PADDLE_ENFORCE_GE( y_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument("The LoD level of Input(Y) should be " - "greater than 1, but received %d.", - y_desc->GetLoDLevel())); + phi::errors::InvalidArgument("The LoD level of Input(Y) should be " + "greater than 1, but received %d.", + y_desc->GetLoDLevel())); ctx->ShareLoD("X", "Out"); } @@ -255,20 +255,20 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { const auto& x_lod = x->lod(); PADDLE_ENFORCE_EQ(x_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) should hold LoD information, but " "received Input(X).lod() is empty.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE(x_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s LoD data should be " "equal to 2, but received %d.", x_lod_0.size())); auto x_dims = x->dims(); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(X)'s LoD data should be " "equal to the first dimension of Input(X). " "But received the last element of Input(X)'s LoD " @@ -278,20 +278,20 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { const auto& y_lod = y->lod(); PADDLE_ENFORCE_EQ(y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) should hold LoD information, but " "received Input(Y).lod() is empty.")); const auto& y_lod_0 = y_lod[0]; PADDLE_ENFORCE_GE(y_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y)'s LoD data should be " "equal to 2, but received %d.", y_lod_0.size())); auto y_dims = y->dims(); PADDLE_ENFORCE_EQ(y_dims[0], static_cast(y_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(Y)'s LoD data should be " "equal to the first dimension of Input(Y). " "But received the last element of Input(Y)'s LoD " @@ -301,7 +301,7 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(x_lod_0.size(), y_lod_0.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s and Input(Y)'s LoD " "data should be equal. " "But received the dimensions of Input(X)'s LoD is " diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index aeff6c394c429..974d5d5d5a3c8 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -100,10 +100,10 @@ class BeamSearchFunctor { lod[0].assign(high_level.begin(), high_level.end()); lod[1].assign(low_level.begin(), low_level.end()); if (!framework::CheckLoD(lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(lod))); } selected_ids->set_lod(lod); selected_scores->set_lod(lod); diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 098f40ab526b1..702c34ce2161f 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -504,17 +504,17 @@ class BeamSearchFunctor { num_used_threads)); } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Not implemented other number of sequences yet.")); } context.Wait(); mix_vector.CopyToCPU(); if (!framework::CheckLoD(selected_lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(selected_lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(selected_lod))); } selected_ids->set_lod(selected_lod); diff --git a/paddle/fluid/operators/math/beam_search_xpu.cc b/paddle/fluid/operators/math/beam_search_xpu.cc index 4ac0e3d886017..33484d139982c 100644 --- a/paddle/fluid/operators/math/beam_search_xpu.cc +++ b/paddle/fluid/operators/math/beam_search_xpu.cc @@ -41,7 +41,7 @@ void CopyDataByCondition(const T *x, T **y, int len, const Place &place) { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("Copy data form xpu to cpu failed")); + phi::errors::External("Copy data form xpu to cpu failed")); } } @@ -125,10 +125,10 @@ class BeamSearchFunctor { lod[0].assign(high_level.begin(), high_level.end()); lod[1].assign(low_level.begin(), low_level.end()); if (!framework::CheckLoD(lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(lod))); } selected_ids->set_lod(lod); selected_scores->set_lod(lod); diff --git a/paddle/fluid/operators/math/bert_encoder_functor.h b/paddle/fluid/operators/math/bert_encoder_functor.h index 76e27380b90e2..32b36b9c1515e 100644 --- a/paddle/fluid/operators/math/bert_encoder_functor.h +++ b/paddle/fluid/operators/math/bert_encoder_functor.h @@ -28,7 +28,7 @@ namespace cub = hipcub; #endif #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ struct CUDATypeTraits; template <> struct CUDATypeTraits { - typedef platform::float16 TYPE; + typedef phi::dtype::float16 TYPE; }; template <> diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index 87b3695553356..7a37d929be71d 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -110,7 +110,7 @@ class ConcatFunctor { PADDLE_ENFORCE_EQ( r, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s], please check whether " "Baidu Kunlun Card is properly installed.", r, @@ -169,7 +169,7 @@ class SplitFunctor { PADDLE_ENFORCE_EQ( r, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s], please check whether " "Baidu Kunlun Card is properly installed.", r, @@ -190,8 +190,8 @@ FOR_ALL_TYPES(DEFINE_FUNCTOR); template class SplitFunctor; DEFINE_XPU_FUNCTOR(float) -DEFINE_XPU_FUNCTOR(platform::float16) -DEFINE_XPU_FUNCTOR(platform::bfloat16) +DEFINE_XPU_FUNCTOR(phi::dtype::float16) +DEFINE_XPU_FUNCTOR(phi::dtype::bfloat16) #endif } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 20211160b7e5e..f510034a7ea0c 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -144,7 +144,7 @@ class ContextProjectFunctor { if (padding_trainable) { PADDLE_ENFORCE_NOT_NULL( padding_data, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor 'padding_data' should not be NULL.")); for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { if (lod_level_0[i] == lod_level_0[i + 1]) continue; diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 8d6b0b99f9d52..c4a22ece92b54 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -38,7 +38,7 @@ static void CheckEighResult(const int batch, const int info) { PADDLE_ENFORCE_LE( info, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "For batch [%d]: the [%d] off-diagonal elements of an intermediate" "tridiagonal form did not converge to zero", batch, @@ -46,7 +46,7 @@ static void CheckEighResult(const int batch, const int info) { PADDLE_ENFORCE_GE( info, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "For batch [%d]: the [%d] argument had an illegal value", batch, info)); @@ -160,7 +160,7 @@ struct MatrixEighFunctor { } if (has_vectors) { PADDLE_ENFORCE_NOT_NULL(eigen_vectors, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When has_vectors is true," "the eigenvectors needs to be calculated, " "so the eigenvectors must be provided.")); @@ -293,7 +293,7 @@ struct MatrixEighFunctor { } if (has_vectors) { PADDLE_ENFORCE_NOT_NULL(eigen_vectors, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When has_vectors is true," "the eigenvectors needs to be calculated," "so the eigenvectors must be provided.")); diff --git a/paddle/fluid/operators/math/prelu.cu b/paddle/fluid/operators/math/prelu.cu index 9dc25e30ce9aa..eadfdf8cf39e4 100644 --- a/paddle/fluid/operators/math/prelu.cu +++ b/paddle/fluid/operators/math/prelu.cu @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/prelu.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -135,18 +135,18 @@ void PreluScalarDirectCUDAFunctor::operator()(gpuStream_t stream, } template class PreluChannelWiseDirectCUDAFunctor; -template class PreluChannelWiseDirectCUDAFunctor; -template class PreluChannelWiseDirectCUDAFunctor; +template class PreluChannelWiseDirectCUDAFunctor; +template class PreluChannelWiseDirectCUDAFunctor; template class PreluChannelWiseDirectCUDAFunctor; template class PreluElementWiseDirectCUDAFunctor; -template class PreluElementWiseDirectCUDAFunctor; -template class PreluElementWiseDirectCUDAFunctor; +template class PreluElementWiseDirectCUDAFunctor; +template class PreluElementWiseDirectCUDAFunctor; template class PreluElementWiseDirectCUDAFunctor; template class PreluScalarDirectCUDAFunctor; -template class PreluScalarDirectCUDAFunctor; -template class PreluScalarDirectCUDAFunctor; +template class PreluScalarDirectCUDAFunctor; +template class PreluScalarDirectCUDAFunctor; template class PreluScalarDirectCUDAFunctor; } // namespace math diff --git a/paddle/fluid/operators/math/sampler.h b/paddle/fluid/operators/math/sampler.h index 9bca69edd1fea..e14e1ca572cab 100644 --- a/paddle/fluid/operators/math/sampler.h +++ b/paddle/fluid/operators/math/sampler.h @@ -36,7 +36,7 @@ class Sampler { PADDLE_ENFORCE_GT( range, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Range should be greater than 0, but received %d.", range)); if (seed == 0) { std::random_device r; diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index 41c131de0f392..f9950cd95de0b 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -58,7 +58,7 @@ void Tree2ColUtil::construct_tree(const phi::DenseTensor &EdgeSet, const auto &edge_set_dims = EdgeSet.dims(); PADDLE_ENFORCE_EQ(edge_set_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of the EdgeSet shall be 2, but " "got %ld != 2. Please check the input value.", edge_set_dims[1])); diff --git a/paddle/fluid/operators/math/unpooling.cc b/paddle/fluid/operators/math/unpooling.cc index 78c41f1b8387a..a4e64b4d84fc2 100644 --- a/paddle/fluid/operators/math/unpooling.cc +++ b/paddle/fluid/operators/math/unpooling.cc @@ -43,7 +43,7 @@ class Unpool2dMaxFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor height * output tensor " "width. Expected %ld < %ld, but got " "%ld >= %ld. Please check input value.", @@ -88,7 +88,7 @@ class Unpool2dMaxGradFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor height * output tensor " "width. Expected %ld < %ld, but got " "%ld >= %ld. Please check input value.", @@ -134,7 +134,7 @@ class Unpool3dMaxFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor depth * output tensor " "height " "* output tensor width. Expected %ld < %ld, but got " @@ -182,7 +182,7 @@ class Unpool3dMaxGradFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor depth * output tensor " "height " "* output tensor width. Expected %ld < %ld, but got " diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 895a427bae6e2..c55a1e6b14123 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -961,7 +961,7 @@ REGISTER_OP_CUDA_KERNEL( matmul, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #endif #if defined(PADDLE_WITH_CUDA) @@ -971,13 +971,13 @@ REGISTER_OP_CUDA_KERNEL( ops::MatMulKernel, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #else REGISTER_OP_CUDA_KERNEL( matmul, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #endif #endif @@ -985,7 +985,7 @@ REGISTER_OP_CUDA_KERNEL( matmul_grad, ops::MatMulGradKernel, ops::MatMulGradKernel, - ops::MatMulGradKernel); + ops::MatMulGradKernel); REGISTER_OP_CUDA_KERNEL(matmul_grad_grad, ops::MatMulDoubleGradKernel, ops::MatMulDoubleGradKernel); diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc index de2aa41d971df..095a90737f9ad 100644 --- a/paddle/fluid/operators/matmul_op_xpu.cc +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/xpu_api_wrapper.h" +#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h" namespace paddle { namespace operators { @@ -156,12 +156,13 @@ REGISTER_OP_XPU_KERNEL( matmul, ops::MatMulXPUKernel, ops::MatMulXPUKernel, - ops::MatMulXPUKernel); + ops::MatMulXPUKernel); REGISTER_OP_XPU_KERNEL( matmul_grad, ops::MatMulGradXPUKernel, ops::MatMulGradXPUKernel, ops::MatMulGradXPUKernel); + phi::dtype::float16>); #endif diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc index 7233e437e147a..9c1087d42b2b3 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.cc +++ b/paddle/fluid/operators/memcpy_d2h_op.cc @@ -67,10 +67,10 @@ class MemcpyD2HKernel { if (x == nullptr) { return; } - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of memcpy_d2h_op is not found.")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of memcpy_d2h_op is not found.")); auto *out = ctx.OutputVar("Out"); // Get dev_ctx from ExecutionContext, it's D2H stream auto &dev_ctx = ctx.device_context(); @@ -136,13 +136,13 @@ REGISTER_OP_IPU_KERNEL_FUNCTOR(memcpy_d2h, ops::MemcpyD2HKernel, bool, ops::MemcpyD2HKernel, - paddle::platform::bfloat16, + phi::dtype::bfloat16, ops::MemcpyD2HKernel, paddle::platform::complex, ops::MemcpyD2HKernel, paddle::platform::complex, ops::MemcpyD2HKernel, - plat::float16, + phi::dtype::float16, ops::MemcpyD2HKernel, int16_t, ops::MemcpyD2HKernel); diff --git a/paddle/fluid/operators/memcpy_d2h_op.h b/paddle/fluid/operators/memcpy_d2h_op.h index 4f948e4482f8a..2a69ae556adfd 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.h +++ b/paddle/fluid/operators/memcpy_d2h_op.h @@ -53,7 +53,7 @@ class MemcpyD2HFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -62,7 +62,7 @@ class MemcpyD2HFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } @@ -76,7 +76,7 @@ class MemcpyD2HFunctor { } else if (dst_place_type_ == 0) { framework::TensorCopy(src, platform::CPUPlace(), dev_ctx_, &dst); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } // NOTE(Aurelius84): host <-> device memory copies of a memory block of 64 diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index 457b629268659..85cd21831c9b1 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -68,10 +68,10 @@ class MemcpyH2DKernel { if (x == nullptr) { return; } - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of memcpy_d2h_op is not found.")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of memcpy_d2h_op is not found.")); auto *out = ctx.OutputVar("Out"); // Get dev_ctx from ExecutionContext, it's H2D stream auto &dev_ctx = ctx.device_context(); @@ -137,13 +137,13 @@ REGISTER_OP_IPU_KERNEL_FUNCTOR(memcpy_h2d, ops::MemcpyH2DKernel, bool, ops::MemcpyH2DKernel, - paddle::platform::bfloat16, + phi::dtype::bfloat16, ops::MemcpyH2DKernel, paddle::platform::complex, ops::MemcpyH2DKernel, paddle::platform::complex, ops::MemcpyH2DKernel, - plat::float16, + phi::dtype::float16, ops::MemcpyH2DKernel, int16_t, ops::MemcpyH2DKernel); diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h index 5f480461d77cd..6b83ab1541976 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.h +++ b/paddle/fluid/operators/memcpy_h2d_op.h @@ -53,7 +53,7 @@ class MemcpyH2DFunctor { framework::TensorCopy( lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } out_tensor.set_lod(lod_tensor.lod()); @@ -61,7 +61,7 @@ class MemcpyH2DFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -70,7 +70,7 @@ class MemcpyH2DFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index bb3e29df16d53..8031e318f3af9 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -87,7 +87,7 @@ class MemcpyKernel { PADDLE_ENFORCE_EQ( ctx.HasOutput("Out"), true, - platform::errors::NotFound("Output(Out) of memcpy_op is not found.")); + phi::errors::NotFound("Output(Out) of memcpy_op is not found.")); auto *out = ctx.OutputVar("Out"); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(ctx.GetPlace()); diff --git a/paddle/fluid/operators/memcpy_op.h b/paddle/fluid/operators/memcpy_op.h index bfdd43eaaa519..81432dcb30f6b 100644 --- a/paddle/fluid/operators/memcpy_op.h +++ b/paddle/fluid/operators/memcpy_op.h @@ -66,7 +66,7 @@ class MemcpyFunctor { lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); #endif } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } out_tensor.set_lod(lod_tensor.lod()); @@ -74,7 +74,7 @@ class MemcpyFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -83,7 +83,7 @@ class MemcpyFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index 3ed27460e16b6..2783aeea44e1d 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -60,7 +60,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( in_true.numel() || in_false.numel(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(InTrue) or Input(InFalse) should be initialized.")); auto &mask_dim = mask.dims(); @@ -72,7 +72,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { framework::TensorCopy( mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "Not supported GPU, Please recompile or reinstall paddle with CUDA " "support.")); #endif @@ -129,7 +129,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_GE(end_offset, start_offset, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The end offset less than start offset, end offset " "is %d, start offset is %d.", end_offset, @@ -216,7 +216,7 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { auto mask_dim = context->GetInputDim("Mask"); PADDLE_ENFORCE_EQ(mask_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If you are using IfElse OP:" "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " "ie.true_block():\n out_1 = ie.input(x)\n\n" @@ -227,7 +227,7 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { if (context->IsRuntime() || mask_dim[1] > 0) { PADDLE_ENFORCE_EQ(mask_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If you are using IfElse OP:" "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " "ie.true_block():\n out_1 = ie.input(x)\n\n" diff --git a/paddle/fluid/operators/metrics/precision_recall_op.cc b/paddle/fluid/operators/metrics/precision_recall_op.cc index 63385cb59171f..95a66cb2edd1d 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.cc +++ b/paddle/fluid/operators/metrics/precision_recall_op.cc @@ -25,32 +25,32 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("MaxProbs"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Input(MaxProbs) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Indices"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Input(Indices) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Labels"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Input(Labels) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchMetrics"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Output(BatchMetrics) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("AccumMetrics"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Output(AccumMetrics) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("AccumStatesInfo"), true, - platform::errors::NotFound( + phi::errors::NotFound( "PrecisionRecallOp Output(AccumStatesInfo) should not be null.")); int64_t cls_num = @@ -61,7 +61,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(max_probs_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each instance of PrecisionRecallOp " "Input(MaxProbs) contains one max probability, " "the shape of Input(MaxProbs) should be " @@ -72,7 +72,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Indices"), max_probs_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of PrecisionRecallOp Input(Indices) should be same " "with " "max_probs_dims. But received the shape of Input(Indices) is " @@ -84,7 +84,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( max_probs_dims[0], labels_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of PrecisionRecallOp Input(MaxProbs) and " "Input(Labels) both should be batch_size" "But the 1st dimension we received max_probs_dims[0] = %d, " @@ -93,7 +93,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { labels_dims[0])); PADDLE_ENFORCE_EQ(labels_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of PrecisionRecallOp " "Input(Labels) contains instance label and " "the shape should be equal to 1. But the 2nd " @@ -107,7 +107,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weights_dims, common::make_ddim({max_probs_dims[0], 1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of PrecisionRecallOp Input(Weights) should be " "[batch_size, 1]. But the shape we received is [%d, %d]", weights_dims[0], @@ -121,7 +121,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( states_dims, common::make_ddim({cls_num, 4}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of PrecisionRecallOp Input(StatesInfo) should be " "[class_number, 4]. But the shape we received is [%d, %d]", states_dims[0], diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h index 6eef5658c5c00..8a276d2fa5a32 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ b/paddle/fluid/operators/metrics/precision_recall_op.h @@ -61,13 +61,13 @@ class PrecisionRecallKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( idx, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Class index of each instance should be " "greater than or equal to 0, But the index we received is %d", idx)); PADDLE_ENFORCE_LT(idx, cls_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Class index of each instance should be less than " "cls_num = %d, But the index we received is %d", cls_num, @@ -75,13 +75,13 @@ class PrecisionRecallKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(label, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Label of each instance should be greater than or " "equal to 0, But the label we received is %d", label)); PADDLE_ENFORCE_LT(label, cls_num, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Label of each instance should be less than " "cls_num = %d, But the label we received is %d", cls_num, diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc index 64bc176d97149..1726a8f818ec1 100644 --- a/paddle/fluid/operators/minus_op.cc +++ b/paddle/fluid/operators/minus_op.cc @@ -34,15 +34,15 @@ class MinusOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("Input(X) of MinusOp is not found.")); + phi::errors::NotFound("Input(X) of MinusOp is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Y"), true, - platform::errors::NotFound("Input(Y) of MinusOp is not found.")); + phi::errors::NotFound("Input(Y) of MinusOp is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound("Output(Out) of MinusOp is not found.")); + phi::errors::NotFound("Output(Out) of MinusOp is not found.")); auto x_dims = ctx->GetInputDim("X"); auto y_dims = ctx->GetInputDim("Y"); @@ -52,7 +52,7 @@ class MinusOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Minus operator must take two tensor with same dim, but received " "input X dim is:[%s], Y dim is:[%s]", x_dims, diff --git a/paddle/fluid/operators/minus_op.h b/paddle/fluid/operators/minus_op.h index 8cc18fe0c97ec..2f900a2b16bc2 100644 --- a/paddle/fluid/operators/minus_op.h +++ b/paddle/fluid/operators/minus_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" namespace paddle { namespace operators { @@ -31,7 +31,7 @@ class MinusKernel : public framework::OpKernel { out_tensor->mutable_data(context.GetPlace()); auto& dev = *context.template device_context().eigen_device(); - EigenSub, T>::Eval( + phi::funcs::EigenSub, T>::Eval( dev, framework::EigenVector::Flatten(*out_tensor), framework::EigenVector::Flatten(*left_tensor), diff --git a/paddle/fluid/operators/miopen_rnn_cache.h b/paddle/fluid/operators/miopen_rnn_cache.h index 19255363259b5..2a8b38d38d577 100644 --- a/paddle/fluid/operators/miopen_rnn_cache.h +++ b/paddle/fluid/operators/miopen_rnn_cache.h @@ -92,10 +92,10 @@ struct CudnnRNNCache { const auto numDirections = is_bidirec_ ? 2 : 1; - PADDLE_ENFORCE_EQ(miopen_type, - miopenFloat, - platform::errors::InvalidArgument( - "MIOPEN do not support double datatype.")); + PADDLE_ENFORCE_EQ( + miopen_type, + miopenFloat, + phi::errors::InvalidArgument("MIOPEN do not support double datatype.")); auto miopen_size = sizeof(float); x_desc_ = new miopenTensorDescriptor_t[seq_length_]; @@ -259,7 +259,7 @@ struct CudnnRNNCache { PADDLE_ENFORCE_EQ( weights_size_, miopen_size * weight_numel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The miopen lstm and setting weight size should be same.")); int dim_w[3]; diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc index c6d553865277e..4da376ce97487 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cc +++ b/paddle/fluid/operators/modified_huber_loss_op.cc @@ -33,16 +33,16 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("Input(input) rank should be 2, " - "but received input rank(%d) != 2", - x_dims.size())); + phi::errors::InvalidArgument("Input(input) rank should be 2, " + "but received input rank(%d) != 2", + x_dims.size())); if (ctx->IsRuntime() || (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { PADDLE_ENFORCE_EQ( x_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(input) and Input(label) should have the same " "shape, but received input shape [%s] != label shape [%s]", x_dims, @@ -52,7 +52,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(x_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(input) should be 1, " "but received second dimension of input (%d) != 1", x_dims[1])); @@ -123,7 +123,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( intermediate_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Intermediate variable which will be reused in " "backward processing should the same as " "the shape of Input(label), but received Intermediate variable " @@ -134,7 +134,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( out_grad_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of output gradient should be the same as " "the shape of Input(label), but received the output gradient " "shape [%s] != label shape [%s]", diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h index 88cb91d454e72..d0fb4dd40a667 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.h +++ b/paddle/fluid/operators/modified_huber_loss_op.h @@ -32,7 +32,7 @@ struct CheckLabelValue { PADDLE_ENFORCE_EQ( val == static_cast(0) || val == static_cast(1), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(label) value of modified_huber_loss_op expected to be 0 " "or 1, but got %ld. Please check label value.", val)); diff --git a/paddle/fluid/operators/nccl/nccl_op.cc b/paddle/fluid/operators/nccl/nccl_op.cc index c5a1097e2f157..dd3fd52d3b24d 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cc @@ -34,13 +34,13 @@ class NCCLInitOp : public framework::OperatorBase { const platform::Place &place) const override { PADDLE_ENFORCE_NOT_NULL( scope.FindVar(Input(kParallelScopes)), - platform::errors::NotFound("Can not find variable '%s' in the scope.", - kParallelScopes)); + phi::errors::NotFound("Can not find variable '%s' in the scope.", + kParallelScopes)); const auto &name = Output("Communicator"); PADDLE_ENFORCE_NOT_NULL( scope.FindVar(name), - platform::errors::NotFound( - "Output(%s) is needed for ncclInit operator.", name)); + phi::errors::NotFound("Output(%s) is needed for ncclInit operator.", + name)); // A parallel do may not use all the gpus. For example, the batch size is 7 // in the last batch while we have 8 gpu. In this case, parallel_do will // create 7 parallel scopes, so should ncclInitOp create 7 gpu peers @@ -52,7 +52,7 @@ class NCCLInitOp : public framework::OperatorBase { } PADDLE_ENFORCE_EQ(!gpus.empty(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "gpus is empty, NCCL must init with gpus")); platform::Communicator *comm = @@ -104,11 +104,10 @@ class NCCLAllReduceOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLAllReduce"); std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ( - (reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - platform::errors::InvalidArgument("invalid nccl reduction.")); + PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || + reduction == "ncclMin" || reduction == "ncclMax"), + true, + phi::errors::InvalidArgument("invalid nccl reduction.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); @@ -150,11 +149,10 @@ class NCCLReduceOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLReduce"); std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ( - (reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - platform::errors::InvalidArgument("invalid nccl reduction.")); + PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || + reduction == "ncclMin" || reduction == "ncclMax"), + true, + phi::errors::InvalidArgument("invalid nccl reduction.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); @@ -201,10 +199,9 @@ class NCCLBcastOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLBcast"); int root = ctx->Attrs().Get("root"); - PADDLE_ENFORCE_EQ( - root != platform::kInvalidGPUId, - true, - platform::errors::InvalidArgument("Bcast root must be set.")); + PADDLE_ENFORCE_EQ(root != platform::kInvalidGPUId, + true, + phi::errors::InvalidArgument("Bcast root must be set.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index abb24cc8cae10..f1d6073a37231 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -46,7 +46,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) { auto it = str_to_type.find(reduction); PADDLE_ENFORCE_EQ(it != str_to_type.end(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid nccl reduction. Must be ncclMin | ncclMax | " "ncclProd | ncclSum")); return it->second; @@ -58,7 +58,7 @@ class NCCLAllReduceKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "This kernel only runs on GPU device.")); auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -91,10 +91,10 @@ template class NCCLReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); auto x = ctx.Input("X"); // x0, x1, x2 auto out = ctx.Output("Out"); auto* comm = ctx.Input("Communicator"); @@ -132,10 +132,10 @@ template class NCCLBcastKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); int root = ctx.Attr("root"); auto* comm = ctx.Input("Communicator"); // device id diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 1b622b7571667..ac260615969b4 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -45,7 +45,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Input) and Input(Label) should be " "equal in runtime. But received: Input(Input)'s shape = [%s] " "with 1st dim = %d, Input(Label)'s shape = [%s] with 1st dim = " @@ -61,7 +61,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Weight")[0], ctx->GetInputDim("Bias")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Weight) and Input(Bias) " "should be equal. But received: Input(Weight)'s shape = [%s] " "with 1st dim = %d, and Input(Bias)'s shape = [%s] with 1st dim " @@ -78,7 +78,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( num_total_classes, ctx->GetInputDim("Weight")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of total classes should be equal to the first " "dimension of Input(Weight). But received: Attr(num_total_classes) " "= %d, Input(Weight)'s shape = [%s] with 1st dim = %d.", @@ -89,7 +89,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( custom_neg_classes.size(), static_cast(num_neg_samples), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Attr(custom_neg_classes) should be equal " "to the number of negative samples. But received: " "custom_neg_classes.size() = %d, num_neg_samples = %d.", diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 41262dca6e53c..25a970a5fa6da 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -104,7 +104,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistProbs) " "should be equal to the number of total classes. But Received: " "Input(CustomDistProbs).numel() = %d, Attr(num_total_classes) " @@ -114,7 +114,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistAlias) " "should be equal to the number of total classes. But Received: " "Input(CustomDistAlias).numel() = %d, Attr(num_total_classes) " @@ -124,7 +124,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in " "Input(CustomDistAliasProbs) " "should be equal to the number of total classes. But Received: " @@ -144,7 +144,7 @@ class NCEKernel : public framework::OpKernel { break; } default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unsupported SamplerType. SamplerType should be 0: Uniform, " "1: LogUniform or 2: CustomDist. Received SamplerType: %d", sampler_type)); @@ -180,7 +180,7 @@ class NCEKernel : public framework::OpKernel { for (int x = 0; x < sample_labels->numel(); x++) { PADDLE_ENFORCE_GE(sample_labels_data[x], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ValueError: Every sample label should be " "non-negative. But received: " "Input(SampleLabels)[%d] = %d", @@ -290,7 +290,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistProbs) " "should be equal to the number of total classes. But Received: " "Input(CustomDistProbs).numel() = %d, Attr(num_total_classes) " @@ -300,7 +300,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistAlias) " "should be equal to the number of total classes. But Received: " "Input(CustomDistAlias).numel() = %d, Attr(num_total_classes) " @@ -310,7 +310,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in " "Input(CustomDistAliasProbs) " "should be equal to the number of total classes. But Received: " @@ -330,7 +330,7 @@ class NCEGradKernel : public framework::OpKernel { break; } default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unsupported SamplerType. SamplerType should be 0: Uniform, " "1: LogUniform or 2: CustomDist. Received SamplerType: %d", sampler_type)); @@ -399,7 +399,7 @@ class NCEGradKernel : public framework::OpKernel { auto *table_t = context.Input("Weight"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter Weight of a NCE_OP " "must be either phi::DenseTensor or SelectedRows")); } diff --git a/paddle/fluid/operators/number_count_op.cc b/paddle/fluid/operators/number_count_op.cc index a67d6455bcf5f..7fb293891d3a5 100644 --- a/paddle/fluid/operators/number_count_op.cc +++ b/paddle/fluid/operators/number_count_op.cc @@ -35,7 +35,7 @@ class NumberCountOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(number_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the number_dtype should be int64")); return phi::KernelKey(number_dtype, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/onednn/interpolate_onednn_op.cc b/paddle/fluid/operators/onednn/interpolate_onednn_op.cc index 34e9679b29bb6..eff574b5a577b 100644 --- a/paddle/fluid/operators/onednn/interpolate_onednn_op.cc +++ b/paddle/fluid/operators/onednn/interpolate_onednn_op.cc @@ -115,9 +115,8 @@ class InterpolateOneDNNKernel : public framework::OpKernel { std::all_of( out_dims.begin(), out_dims.end(), [](int i) { return i > 0; }), 0, - platform::errors::InvalidArgument( - "out_d, out_h, out_w of Op(interpolate) " - "should be greater than 0.")); + phi::errors::InvalidArgument("out_d, out_h, out_w of Op(interpolate) " + "should be greater than 0.")); const std::vector nc_dims = {in_dims[0], in_dims[1]}; out_dims.insert(out_dims.begin(), nc_dims.begin(), nc_dims.end()); diff --git a/paddle/fluid/operators/onednn/lrn_onednn_op.cc b/paddle/fluid/operators/onednn/lrn_onednn_op.cc index 7b22d5d3c6ff0..77d76add0174e 100644 --- a/paddle/fluid/operators/onednn/lrn_onednn_op.cc +++ b/paddle/fluid/operators/onednn/lrn_onednn_op.cc @@ -69,7 +69,7 @@ class LRNOneDNNHandler PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "is_test attribute should be set to False in training phase.")); const int n = ctx.Attr("n"); @@ -123,11 +123,11 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE_EQ( is_float_type, true, - platform::errors::PreconditionNotMet("DNNL LRN must use float data.")); - PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), - true, - paddle::platform::errors::PreconditionNotMet( - "Operator DNNL LRN must use CPUPlace")); + phi::errors::PreconditionNotMet("DNNL LRN must use float data.")); + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("Operator DNNL LRN must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); @@ -169,11 +169,11 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { const bool is_float_type = std::is_same::value; PADDLE_ENFORCE_EQ(is_float_type, true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "DNNL LRN GradOpKernel must use float data.")); PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, - paddle::platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Operator DNNL LRNGrad must use CPUPlace")); auto in_x = ctx.Input("X"); diff --git a/paddle/fluid/operators/onednn/matmul_onednn_op.cc b/paddle/fluid/operators/onednn/matmul_onednn_op.cc index 80af1b00b743c..b501cec806069 100644 --- a/paddle/fluid/operators/onednn/matmul_onednn_op.cc +++ b/paddle/fluid/operators/onednn/matmul_onednn_op.cc @@ -400,15 +400,15 @@ class MatMulMKLDNNKernel : public paddle::framework::OpKernel { trans_y, out); } else if (is_bfloat16) { - ExecuteMatMulV1(ctx, - onednn_engine, - x, - x_bd_dims, - trans_x, - y, - y_bd_dims, - trans_y, - out); + ExecuteMatMulV1(ctx, + onednn_engine, + x, + x_bd_dims, + trans_x, + y, + y_bd_dims, + trans_y, + out); } else { ExecuteMatMulV1(ctx, onednn_engine, @@ -661,7 +661,7 @@ REGISTER_OP_KERNEL(matmul, MKLDNN, ::phi::CPUPlace, MatMulMKLDNNKernel, - MatMulMKLDNNKernel, + MatMulMKLDNNKernel, MatMulMKLDNNKernel, MatMulMKLDNNKernel); @@ -669,4 +669,4 @@ REGISTER_OP_KERNEL(matmul_grad, MKLDNN, ::phi::CPUPlace, MatMulGradMKLDNNKernel, - MatMulGradMKLDNNKernel); + MatMulGradMKLDNNKernel); diff --git a/paddle/fluid/operators/onednn/quantize_onednn_op.cc b/paddle/fluid/operators/onednn/quantize_onednn_op.cc index 9b1cff1008677..3ad56469c922c 100644 --- a/paddle/fluid/operators/onednn/quantize_onednn_op.cc +++ b/paddle/fluid/operators/onednn/quantize_onednn_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { @@ -39,10 +39,10 @@ class QuantOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE(quantization_scale, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Quantization scale must be different than 0.0f")); PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Quantization shift must be lower or equal to ", "255 and greater or equal to 0, but got %f", quantization_shift)); diff --git a/paddle/fluid/operators/onednn/requantize_onednn_op.cc b/paddle/fluid/operators/onednn/requantize_onednn_op.cc index f467a9c57a8ca..2d277625dc34d 100644 --- a/paddle/fluid/operators/onednn/requantize_onednn_op.cc +++ b/paddle/fluid/operators/onednn/requantize_onednn_op.cc @@ -47,17 +47,17 @@ class ReQuantOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( scale_in, 0.0f, - platform::errors::InvalidArgument("Scale of input cannot be 0.0")); + phi::errors::InvalidArgument("Scale of input cannot be 0.0")); PADDLE_ENFORCE_NE( scale_out, 0.0f, - platform::errors::InvalidArgument("Scale of output cannot be 0.0")); + phi::errors::InvalidArgument("Scale of output cannot be 0.0")); if (shift_in != 0) { PADDLE_ENFORCE_EQ( input->dtype(), DataType::UINT8, - platform::errors::Unimplemented("Requantize does not support nonzero " - "shift for signed input.")); + phi::errors::Unimplemented("Requantize does not support nonzero " + "shift for signed input.")); } auto& dev_ctx = ctx.template device_context(); @@ -140,4 +140,4 @@ PD_REGISTER_STRUCT_KERNEL(requantize, ops::ReQuantOpKernel, int8_t, uint8_t, - paddle::platform::bfloat16) {} + phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/onednn/reshape_onednn_op.cc b/paddle/fluid/operators/onednn/reshape_onednn_op.cc index 8632160b04ae0..7dba03ca6a799 100644 --- a/paddle/fluid/operators/onednn/reshape_onednn_op.cc +++ b/paddle/fluid/operators/onednn/reshape_onednn_op.cc @@ -37,7 +37,7 @@ static std::vector extract_shape( PADDLE_ENFORCE_EQ( tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If the element type of 'shape' in ReshapeOp is phi::DenseTensor, " "the element's shape must be [1]. But received the element's shape " "is [%s]", @@ -104,7 +104,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { break; case ReshapeKernelOpName::flatten: default: - PADDLE_THROW(paddle::platform::errors::OutOfRange( + PADDLE_THROW(phi::errors::OutOfRange( "Reshape kernel doesn not support that operator name")); } } @@ -180,7 +180,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( unk_dim_idx, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", common::make_ddim(shape), @@ -190,7 +190,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " @@ -203,7 +203,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", @@ -227,7 +227,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( output_shape[unk_dim_idx] * capacity, -in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' attribute in ReshapeOp is invalid. " "The input tensor X'size must be divisible by known " "capacity of 'shape'. " @@ -245,7 +245,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X'size must be equal to the capacity of " "'shape'. " @@ -319,7 +319,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { InferShapeFlattenGradOp(ctx, x_dims); break; default: - PADDLE_THROW(paddle::platform::errors::OutOfRange( + PADDLE_THROW(phi::errors::OutOfRange( "Reshape grad kernel doesn not support that operator name")); } } @@ -345,7 +345,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -353,7 +353,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); REGISTER_OP_KERNEL( @@ -361,7 +361,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -369,7 +369,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); REGISTER_OP_KERNEL( @@ -377,7 +377,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -385,5 +385,5 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); diff --git a/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc b/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc index 6f656c5f1a2a1..19396cbe489ce 100644 --- a/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc +++ b/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc @@ -75,4 +75,4 @@ REGISTER_OP_KERNEL(shuffle_channel, MKLDNN, phi::CPUPlace, ops::ShuffleChannelMKLDNNKernel, - ops::ShuffleChannelMKLDNNKernel); + ops::ShuffleChannelMKLDNNKernel); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 6c64c6a1f72ff..23441206a55c1 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -36,14 +36,14 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { "DecayedAdagradOp"); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front())); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->Inputs("Grad").front(), @@ -57,26 +57,26 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(common::product(lr_dims), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ(common::product(lr_dims), - 1, - platform::errors::InvalidArgument( - "LearningRate should have one element")); + PADDLE_ENFORCE_EQ( + common::product(lr_dims), + 1, + phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Grad input of DecayedAdagradOp should have " "the same dimension.")); PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Moment"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Moment input of DecayedAdagradOp should have " "the same dimension.")); diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu index b51d12c003e38..a54aebc3eba5e 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu @@ -60,7 +60,7 @@ static void CheckCommContextHasRingId( const distributed::CommContextManager &comm_context_manager, int ring_id) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -1773,7 +1773,7 @@ void DistributedFusedLambKernel( comm_context_manager.Get(std::to_string(ring_ids[0]))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index d8762b8bd719a..4c5b7bb369ad8 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -22,41 +22,41 @@ class DpsgdOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), - true, - platform::errors::NotFound( - "Input(Param) of DpsgdOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), - true, - platform::errors::NotFound( - "Input(Grad) of DpsgdOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Param"), + true, + phi::errors::NotFound("Input(Param) of DpsgdOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Grad"), + true, + phi::errors::NotFound("Input(Grad) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("LearningRate"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(LearningRate) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->GetInputsVarType("Param").front())); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->GetInputsVarType("Grad").front())); PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(ParamOut) of DpsgdOp should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning rate should have 1 dimension. But Received " "LearningRate's dims [%s].", common::product(lr_dims))); @@ -64,7 +64,7 @@ class DpsgdOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Grad input of DpsgdOp should have same dimension. But " "received Para's dim [%s] and Grad's dim [%s].", param_dims, diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h index 0f2980ff368f4..427dc15f74638 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.h +++ b/paddle/fluid/operators/optimizers/dpsgd_op.h @@ -31,7 +31,7 @@ class DpsgdOpKernel : public framework::OpKernel { const auto *param_var = ctx.InputVar("Param"); PADDLE_ENFORCE_EQ(param_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Param").front(), @@ -40,7 +40,7 @@ class DpsgdOpKernel : public framework::OpKernel { const auto *grad_var = ctx.InputVar("Grad"); PADDLE_ENFORCE_EQ(grad_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Grad").front(), @@ -56,12 +56,12 @@ class DpsgdOpKernel : public framework::OpKernel { auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input parameter's number of elements is error, " "expected %zu, but received %zu.")); PADDLE_ENFORCE_EQ(grad->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input gradient's number of elements is error, " "expected %zu, but received %zu.")); diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cc b/paddle/fluid/operators/optimizers/ftrl_op.cc index e6eadadc17b6c..37edf5b8f8aa8 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cc +++ b/paddle/fluid/operators/optimizers/ftrl_op.cc @@ -45,7 +45,7 @@ class FTRLOp : public framework::OperatorWithKernel { auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Two input of FTRL Op's dimension must be same, but " "param_dim is %d, Grad is %d", param_dim, @@ -54,14 +54,14 @@ class FTRLOp : public framework::OperatorWithKernel { auto lr_dim = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(common::product(lr_dim), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(common::product(lr_dim), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning Rate should be a scalar, but got %d", common::product(lr_dim))); diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index d563b84b8d5c6..347dcbafa38d5 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -221,8 +221,8 @@ class FTRLOpKernel : public framework::OpKernel { lin_accum_out->mutable_data(ctx.GetPlace())); for_range(functor); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Unsupported Variable Type of Grad")); + PADDLE_THROW( + phi::errors::InvalidArgument("Unsupported Variable Type of Grad")); } } }; diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc index 0c5a9721e279b..7923bea20982b 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index 074cc26c994e3..bc842d03a3c44 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -34,7 +34,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Intput(Param) should be equal to the " "Input(Grad) of ProximalGD Op. But received " "Input(Param).dimensions=[%s], " @@ -46,7 +46,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::product(lr_dim), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning Rate should be a scalar. But received dimensions:[%s]", lr_dim)); diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index f6d2435590f9e..a489454ff12a9 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -71,7 +71,7 @@ class SGDOpKernel : public framework::OpKernel { const auto* param_var = ctx.InputVar("Param"); PADDLE_ENFORCE_EQ(param_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Param").front(), @@ -93,7 +93,7 @@ class SGDOpKernel : public framework::OpKernel { ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); PADDLE_ENFORCE_EQ(has_master, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); @@ -131,7 +131,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( param, param_out, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param of SgdOp should be equal with ParamOut " "if variable's type is SelectedRows.")); auto* grad = ctx.Input("Grad"); @@ -140,7 +140,7 @@ class SGDOpKernel : public framework::OpKernel { auto out_dims = param_out->dims(); PADDLE_ENFORCE_EQ(in_height, out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Grad's height of SgdOp should be " "equal with ParamOut's dims. But received Grad's " "height [%s] and ParamOut's dims [%s]", @@ -153,7 +153,7 @@ class SGDOpKernel : public framework::OpKernel { int64_t in_row_numel = in_value.numel() / in_rows.size(); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The in_row_numel of SgdOp should be equal with " "param_out's numel / in_height.")); @@ -179,7 +179,7 @@ class SGDOpKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Grad " "in SgdOp. Excepted LodTensor or " "SelectedRows, But received [%s]", diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h index 66ba8de469fee..ced04109e10bc 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.h +++ b/paddle/fluid/operators/optimizers/sgd_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/kernels/funcs/jit/kernels.h" namespace paddle { @@ -92,7 +92,7 @@ struct sgd_dense_param_kernel -struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; @@ -100,12 +100,12 @@ struct sgd_dense_param_kernel("Param"); auto *param_out = ctx.Output("ParamOut"); const auto *grad = ctx.Input("Grad"); - param_out->mutable_data(ctx.GetPlace()); + param_out->mutable_data(ctx.GetPlace()); - auto p = framework::EigenVector::Flatten(*param); - auto g = framework::EigenVector::Flatten(*grad); - auto o = framework::EigenVector::Flatten(*param_out); - const auto *lr = learning_rate->data(); + auto p = framework::EigenVector::Flatten(*param); + auto g = framework::EigenVector::Flatten(*grad); + auto o = framework::EigenVector::Flatten(*param_out); + const auto *lr = learning_rate->data(); o = p - lr[0] * g; } @@ -113,7 +113,7 @@ struct sgd_dense_param_kernel -struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; @@ -127,15 +127,15 @@ struct sgd_dense_param_kernel(grad_rows.size()); const auto grad_width = grad_value.numel() / grad_val_height; - const auto *grad_data = grad_value.data(); - auto *out_data = param_out->data(); - const auto *lr = learning_rate->data(); + const auto *grad_data = grad_value.data(); + auto *out_data = param_out->data(); + const auto *lr = learning_rate->data(); for (size_t i = 0; i < grad_rows.size(); ++i) { PADDLE_ENFORCE_LT( grad_rows[i], grad_height, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Grad rows index value should be less than grad height." "Got [%s], but expected less than [%s]", grad_rows[i], @@ -170,7 +170,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Parameter in SgdOp. Excepted " "LodTensor or SelectedRows, But received [%s]", paddle::framework::ToTypeName(param_var->Type()))); @@ -188,22 +188,22 @@ class SGDOpKernel : public framework::OpKernel { const auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param's numel of SgdOp " "should be equal with ParamOut's numel. " "But received Param's " "numel = [%s], ParamOut's numel = [%s]", param->numel(), sz)); - PADDLE_ENFORCE_EQ(grad->numel(), - sz, - platform::errors::InvalidArgument( - "The input tensor Grad's numel of SgdOp " - "should be equal with ParamOut's numel. " - "But received Grad's " - "numel = [%s], ParamOut's numel = [%s]", - grad->numel(), - sz)); + PADDLE_ENFORCE_EQ( + grad->numel(), + sz, + phi::errors::InvalidArgument("The input tensor Grad's numel of SgdOp " + "should be equal with ParamOut's numel. " + "But received Grad's " + "numel = [%s], ParamOut's numel = [%s]", + grad->numel(), + sz)); dense_param_and_grad_kernel(ctx); } else if (grad_var->IsType()) { @@ -212,7 +212,7 @@ class SGDOpKernel : public framework::OpKernel { // It's better to find a more elegant solution. PADDLE_ENFORCE_EQ(param, param_out, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param of SgdOp " "should be equal with ParamOut if variable's " "type is SelectedRows. ")); @@ -228,7 +228,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( grad->height(), out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Grad's height of SgdOp " "should be equal with ParamOut's dims. But received Grad's " "height [%s] and ParamOut's dims [%s]", @@ -246,7 +246,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( grad_width, param_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The grad_value's numel of SgdOp " "should be equal with param_out's numel. But received " "grad_value's numel [%s] and param_out's numel [%s]", @@ -258,7 +258,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Grad in SgdOp. Excepted " "LodTensor or SelectedRows, But received [%s]", paddle::framework::ToTypeName(grad_var->Type()))); @@ -273,7 +273,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(grad_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When param is SelectedRows, gradient should also " "be SelectedRows")); const auto ¶m = param_var->Get(); @@ -291,7 +291,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( param_row_width, grad_row_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The param_row in SgdOP should have the same size with grad_row. " "But received param_row's width is [%s], and grad_row's width is " "[%s]", @@ -306,7 +306,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( id_index, static_cast(0), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id in SgdOp should be >= 0. But received id_index is [%s]", id_index)); for (int64_t j = 0; j < grad_row_width; j++) { diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index c9f9181664e51..7ef426cedad19 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -25,7 +25,7 @@ class SparseMomentumOpInferVarType : public framework::VarTypeInference { auto in_var_type = ctx->GetInputType("Param"); PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::LOD_TENSOR, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support LodTensor, Unexpected Input Type.")); ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS); diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cu b/paddle/fluid/operators/optimizers/sparse_momentum_op.cu index a0df85e1453da..0a98ee4b3e5de 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cu +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cu @@ -14,7 +14,7 @@ #include "paddle/fluid/operators/optimizers/sparse_momentum_op.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -24,4 +24,4 @@ PD_REGISTER_STRUCT_KERNEL(sparse_momentum, ops::SparseMomentumOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index 4c47fd2b62178..6f1a9712115af 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -21,9 +21,9 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #ifdef __NVCC__ #include "cub/cub.cuh" @@ -154,7 +154,7 @@ class SparseMomentumOp : public framework::OperatorWithKernel { auto lr_dims = common::product(ctx->GetInputDim("LearningRate")); PADDLE_ENFORCE_EQ(lr_dims != 0 && lr_dims == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning_rate should be a scalar. But Received " "LearningRate's dim [%s]", lr_dims)); @@ -163,7 +163,7 @@ class SparseMomentumOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( param_dim, ctx->GetInputDim("Velocity"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Velocity of SparseMomentumOp should have the same " "dimension. But received Param's dim [%s] and Velocity [%s].", param_dim, @@ -384,8 +384,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( axis == 0 || axis == 1, true, - platform::errors::InvalidArgument("The axis of sparse_momentum_op only " - "support axis=0 or axis=1 now.")); + phi::errors::InvalidArgument("The axis of sparse_momentum_op only " + "support axis=0 or axis=1 now.")); auto learning_rate = ctx.Input("LearningRate"); auto param = ctx.Input("Param"); @@ -400,13 +400,13 @@ class SparseMomentumOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( index->dims()[0], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of sparse_momentum_op should not be empty" "when the index's rank is 1.")); } else if (index->dims().size() == 2) { PADDLE_ENFORCE_EQ(index->dims()[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If the index's rank of sparse_momentum_op is 2," " the second dimension should be 1.")); } @@ -418,7 +418,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); PADDLE_ENFORCE_EQ(has_master, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); @@ -443,16 +443,16 @@ class SparseMomentumOpKernel : public framework::OpKernel { auto param_dims = param->dims(); auto grad_dims = grad->dims(); - PADDLE_ENFORCE_EQ(param_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Param's rank of sparse_momentum_op" - " must be 2 now.")); - PADDLE_ENFORCE_EQ(grad_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Grad's rank of sparse_momentum_op" - " must be 2 now.")); + PADDLE_ENFORCE_EQ( + param_dims.size(), + 2, + phi::errors::InvalidArgument("The Param's rank of sparse_momentum_op" + " must be 2 now.")); + PADDLE_ENFORCE_EQ( + grad_dims.size(), + 2, + phi::errors::InvalidArgument("The Grad's rank of sparse_momentum_op" + " must be 2 now.")); phi::DenseTensor sorted_index, grad_index, sort_value; auto sorted_index_ptr = @@ -511,7 +511,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { grad_index_ptr[i] = vec_tosort[i].second; } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "sparse_momentum %s is not supported.", ctx.GetPlace())); } diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 6529bbc29fcfe..618b8daaf6255 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -413,7 +413,7 @@ class Pad2dOp : public framework::OperatorWithKernel { auto x_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Input(X)'s dimension should be equal to " "4, but received %d. ", x_dim.size())); @@ -425,14 +425,14 @@ class Pad2dOp : public framework::OperatorWithKernel { auto paddings_dim = ctx->GetInputDim("Paddings"); PADDLE_ENFORCE_EQ(paddings_dim.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of Input(Paddings)'s dimension should be " "equal to 1, but received %d.", paddings_dim.size())); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(paddings_dim[0], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(Paddings) should be equal to " "[4], but received [%d].", paddings_dim[0])); @@ -445,7 +445,7 @@ class Pad2dOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( paddings.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of paddings should be equal to 4, but received %d.", static_cast(paddings.size()))); if (data_format == "NCHW") { diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index 7fca2eea27c45..518da44d1a08e 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -26,26 +26,26 @@ class PartialConcatOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of Partial ConcatOp should not be empty.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of Partial ConcatOp should not be null.")); auto inputs_dims = ctx->GetInputsDim("X"); PADDLE_ENFORCE_EQ(inputs_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); const size_t inputs_num = inputs_dims.size(); PADDLE_ENFORCE_GT(inputs_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: Input tensors count should > 0. But " "received inputs' length is 0.")); if (inputs_num == 1) { @@ -57,7 +57,7 @@ class PartialConcatOp : public framework::OperatorWithKernel { for (size_t i = 0; i < inputs_num; ++i) { PADDLE_ENFORCE_EQ(inputs_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It only supports two dimensions input now.")); if (i == 0) { batch_size = inputs_dims[0][0]; @@ -65,11 +65,11 @@ class PartialConcatOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(inputs_dims[i][0], batch_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch size of all inputs must be same")); PADDLE_ENFORCE_EQ(inputs_dims[i][1], input_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input length of all inputs must be same")); } } @@ -101,10 +101,10 @@ class PartialConcatOp : public framework::OperatorWithKernel { break; } } - PADDLE_ENFORCE_EQ(flag, - 1, - platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ( + flag, + 1, + phi::errors::InvalidArgument("All Inputs of PartialSum OP are Empty!")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -124,7 +124,7 @@ class PartialConcatGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_names.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of arguments in %s[%d] and %s[%d] is not equal.", in_x, in_names.size(), diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index fb746b2944acc..a597cb11f08ff 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_concat_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; @@ -73,13 +73,13 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel { phi::DenseTensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(in_vars[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); auto input_dim = in_vars[0]->dims(); PADDLE_ENFORCE_EQ(input_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); auto in_size = input_dim[1]; @@ -156,7 +156,7 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); // all parameters auto batch_size = ins[0]->dims()[0]; @@ -240,7 +240,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_concat, double, int, int64_t, - plat::float16, + phi::dtype::float16, phi::dtype::complex, phi::dtype::complex) {} PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, @@ -251,6 +251,6 @@ PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, double, int, int64_t, - plat::float16, + phi::dtype::float16, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index fb0d17aa97b84..16dca9c8c8050 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { @@ -28,7 +28,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { PADDLE_ENFORCE_EQ( start_index >= -size && start_index < size, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The start_index is expected to be in range of [%d, %d), but got %d", -size, size, @@ -47,13 +47,13 @@ class PartialConcatKernel : public framework::OpKernel { phi::DenseTensor* out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); auto input_dim = ins[0]->dims(); PADDLE_ENFORCE_EQ(input_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); auto in_size = input_dim[1]; @@ -94,7 +94,7 @@ class PartialConcatGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); // all parameters auto batch_size = ins[0]->dims()[0]; diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index 0ac288069f11a..b0c97b4fcc914 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -25,12 +25,12 @@ class PartialSumOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of PartialSumOp should not be empty.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PartialSumOp should not be null.")); auto inputs_dims = ctx->GetInputsDim("X"); @@ -38,7 +38,7 @@ class PartialSumOp : public framework::OperatorWithKernel { const size_t inputs_num = inputs_dims.size(); PADDLE_ENFORCE_GT(inputs_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: Input tensors count should > 0. But " "received inputs' length is 0.")); if (inputs_num == 1) { @@ -55,7 +55,7 @@ class PartialSumOp : public framework::OperatorWithKernel { for (size_t i = 0; i < inputs_num; ++i) { PADDLE_ENFORCE_EQ(inputs_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support two dimensions input now.")); if (i == 0) { batch_size = inputs_dims[0][0]; @@ -63,23 +63,23 @@ class PartialSumOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(inputs_dims[i][0], batch_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch size of all inputs must be same")); PADDLE_ENFORCE_EQ(inputs_dims[i][1], input_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input len of all inputs must be same")); } } - PADDLE_ENFORCE_GT(input_len, - start_index, - platform::errors::OutOfRange( - "start_index must be less than input len")); + PADDLE_ENFORCE_GT( + input_len, + start_index, + phi::errors::OutOfRange("start_index must be less than input len")); if (length > 0) { PADDLE_ENFORCE_GE( input_len, start_index + length, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "start_index + length is larger than input length")); } @@ -104,10 +104,10 @@ class PartialSumOp : public framework::OperatorWithKernel { } } - PADDLE_ENFORCE_EQ(flag, - 1, - platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ( + flag, + 1, + phi::errors::InvalidArgument("All Inputs of PartialSum OP are Empty!")); return phi::KernelKey(input_data_type, platform::CPUPlace()); } }; @@ -127,7 +127,7 @@ class PartialSumGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_names.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of arguments in %s[%d] and %s[%d] is not equal.", in_x, in_names.size(), diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index a38ec4c839469..25758cfde4870 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_sum_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; @@ -80,7 +80,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in_vars[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto place = ctx.GetPlace(); // GPUPlace only now auto start_index = ctx.Attr("start_index"); @@ -156,7 +156,7 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto start_index = ctx.Attr("start_index"); auto length = ctx.Attr("length"); if (length == -1) { diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index 1b88eafae77db..f0b55728efbc6 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -30,7 +30,7 @@ class PartialSumKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto place = ctx.GetPlace(); // CPUPlace only now @@ -68,7 +68,7 @@ class PartialSumGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto start_index = ctx.Attr("start_index"); auto length = ctx.Attr("length"); auto batch_size = ins[0]->dims()[0]; diff --git a/paddle/fluid/operators/positive_negative_pair_op.cc b/paddle/fluid/operators/positive_negative_pair_op.cc index 96d8bbaa6f772..2974b38ffb5ba 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.cc +++ b/paddle/fluid/operators/positive_negative_pair_op.cc @@ -49,7 +49,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { ctx->HasInput("AccumulateNegativePair") && ctx->HasInput("AccumulateNeutralPair"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "All optional inputs(AccumulatePositivePair, " "AccumulateNegativePair, AccumulateNeutralPair) of " "PositiveNegativePairOp are required if one of them " @@ -57,21 +57,21 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulatePositivePair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulatePositivePair) should be [1]. Received " "shape of Input(AccumulatePositivePair): [%s].", ctx->GetInputDim("AccumulatePositivePair"))); PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulateNegativePair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulateNegativePair) should be [1]. Received " "shape of Input(AccumulateNegativePair): [%s].", ctx->GetInputDim("AccumulateNegativePair"))); PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulateNeutralPair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulateNeutralPair) should be [1]. Received " "shape of Input(AccumulateNeutralPair): [%s].", ctx->GetInputDim("AccumulateNeutralPair"))); @@ -82,13 +82,13 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { auto query_dim = ctx->GetInputDim("QueryID"); PADDLE_ENFORCE_EQ(score_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Score should be a 2-D tensor. Received shape of " "Input(Score): [%s].", score_dim)); PADDLE_ENFORCE_EQ(label_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Label should be a 2-D tensor. Received shape of " "Input(Label): [%s].", label_dim)); @@ -98,7 +98,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dim[0], score_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Score) and Input(Label) should have the same " "height (batch size). Received: the shape of Input(Score) is " "[%s], while the shape of Input(Label) is [%s]. The first " @@ -109,7 +109,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Label should be 1, i.e. each item should " "have a scalar label. Received shape of Input(Label) is [%s]. " "The second dimension of it is %d, while the expected is %d.", @@ -120,7 +120,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( query_dim, label_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(QueryID) should have the same shape as Input(Label). " "Received: the shape of Input(QueryID) is [%s], " "while the shape of Input(Label) is [%s].", @@ -131,7 +131,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Weight"), label_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Weight) should have the same shape as Input(Label). " "Received: the shape of Input(Weight) is [%s] while the shape " "of Input(Label) is [%s].", @@ -144,7 +144,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( column, depth, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(column) should be less than depth(the second " "dimension of Input(Score)). Received Attr(column): %d, while " "depth is %d.", @@ -153,7 +153,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( column, -depth, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(column) should be greater than equal to negative " "depth, i.e. the second dimension of Input(Score). " "Received Attr(column): %d, while negative depth is %d.", diff --git a/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc b/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc deleted file mode 100644 index 251cd9bff5400..0000000000000 --- a/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class BernoulliPrimOp : public framework::OperatorBase { - public: - BernoulliPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator bernoulli_p should not be executed directly")); - } -}; - -class BernoulliPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Y", "(Tensor), The output tensor of bernoulli_p op."); - AddAttr>( - "shape", "(std::vector) The shape of output tensor."); - AddAttr("dtype", "(int) The dtype of output tensor."); - AddAttr("p", "(float) The probability of bernoulli distribution."); - AddComment(R"DOC( -Autograd primitive bernoulli_p operator. -)DOC"); - } -}; - -class BernoulliPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - auto shape = ctx->Attrs().Get>("shape"); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); - } -}; - -class BernoulliPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto y_name = Output(ctx, "Y")[0]; - auto data_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - SetDataType(ctx, y_name, data_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(bernoulli_p, - paddle::operators::BernoulliPrimOp, - paddle::operators::BernoulliPrimOpMaker, - paddle::operators::BernoulliPrimOpShapeInference, - paddle::operators::BernoulliPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 26647e8f05c83..e521fc0ffcacf 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -58,12 +58,11 @@ class PrintOp : public framework::OperatorBase { PADDLE_ENFORCE_NOT_NULL( in_var, - platform::errors::NotFound("The input:%s not found in scope", - Input("In"))); + phi::errors::NotFound("The input:%s not found in scope", Input("In"))); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound("The output:%s not found in scope", - Output("Out"))); + phi::errors::NotFound("The output:%s not found in scope", + Output("Out"))); auto &in_tensor = in_var->Get(); phi::DenseTensor *out_tensor = out_var->GetMutable(); diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cc b/paddle/fluid/operators/prune_gate_by_capacity_op.cc index 365342fa7ea5f..4e4bc4d291d68 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cc +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cc @@ -51,7 +51,7 @@ class PruneGateByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( expert_count_num_ele, n_expert * n_worker, - platform::errors::Unavailable( + phi::errors::Unavailable( "The number of elements for expert_count is ( %ld ) incorrect. " "Because the number of expert_count must equal the " "product of n_worker ( %ld ) and n_expert ( %ld ). " @@ -76,11 +76,11 @@ class PruneGateByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( gate_idx_data_type, expert_count_data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the gate_idx and expert_count should be same")); PADDLE_ENFORCE_EQ(gate_idx_data_type, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the gate_idx and expert_count should " "be same as int64")); return phi::KernelKey(gate_idx_data_type, ctx.GetPlace()); diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index 9d5e3eb00d0ef..ff8f9931b0e06 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -29,15 +29,15 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of LookupTableOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(W) of LookupTableOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Outs) of LookupTableOp should not be null.")); auto ids_dims = ctx->GetInputsDim("Ids"); @@ -46,13 +46,13 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only 2 dimensions of the 'Embedding' is supported.")); for (auto &ids_dim : ids_dims) { PADDLE_ENFORCE_EQ(ids_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the 'Ids' tensor must be 2.")); } diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index 414500c2faac3..258de211c482b 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -39,7 +39,7 @@ class DistributedLookupTableKernel : public framework::OpKernel { } else if (var->IsType()) { emb_dim = var->Get().value().dims()[1]; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of `W` must be Tensor, SelectedRows.But got " "unsupport type: %s.", framework::ToTypeName(var->Type()))); diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc index 2a8b4f9be7698..cc9e0aeff1f01 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc @@ -29,11 +29,11 @@ class DistributedPushSparseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PushSparseOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Outs) of PushSparseOp should not be null.")); auto ids_dims = ctx->GetInputsDim("Ids"); @@ -41,7 +41,7 @@ class DistributedPushSparseOp : public framework::OperatorWithKernel { for (auto &ids_dim : ids_dims) { PADDLE_ENFORCE_EQ(ids_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the 'Ids' tensor must be 2.")); } diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index cd919cb7ca0bf..1104b8bed673e 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -43,7 +43,7 @@ class FakeInitOp : public framework::OperatorBase { tensor = out_var.GetMutable()->mutable_value(); tensor->Resize(common::make_ddim(Attr>("shape"))); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "fake init op's output only" "supports SelectedRows and phi::DenseTensor")); } diff --git a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc index 978981a6fcdf3..be1e6c64b2484 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc @@ -66,13 +66,13 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const { VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1]; PADDLE_ENFORCE_EQ(pieces.size(), 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Invalid format of message_and_id argument. " "Expected \"message:block_id\". Received %s", grad_and_id.c_str())); PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0, - platform::errors::AlreadyExists( + phi::errors::AlreadyExists( "The message name %s has already existed in out_map", pieces[0].c_str())); @@ -87,7 +87,7 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const { size_t num_blocks = program->Size(); PADDLE_ENFORCE_GE(num_blocks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Invalid number of blocks in server program. Expected " "equal or greater than 1. Received %zu", num_blocks)); @@ -136,7 +136,7 @@ void HeterListenAndServOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE_EQ(heter_server_, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "RPC service has been created unexpectedly.")); std::string endpoint = Attr("endpoint"); @@ -150,7 +150,7 @@ void HeterListenAndServOp::RunImpl(const framework::Scope &scope, Attr>("optimize_blocks"); PADDLE_ENFORCE_GE(optimize_blocks.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "optimize blocks is less than 1. Optimize blocks " "should be 1 at least on the pserver side.")); diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cc b/paddle/fluid/operators/pull_box_extended_sparse_op.cc index 75918b9ad62a4..a6f2349648ab8 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.cc @@ -24,16 +24,16 @@ class PullBoxExtendedSparseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Ids) of PullBoxExtendedSparseOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of PullBoxExtendedSparseOp should not be empty.")); PADDLE_ENFORCE_GE(ctx->Outputs("OutExtend").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(OutExtend) of PullBoxExtendedSparseOp " "should not be empty.")); auto emb_size = static_cast(ctx->Attrs().Get("emb_size")); @@ -50,7 +50,7 @@ class PullBoxExtendedSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h index 76e570f10fb64..8e5cabb3670c5 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.h +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.h @@ -84,7 +84,7 @@ static void PushBoxExtendedSparseFunctor( } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input slots should be same," "please check")); } diff --git a/paddle/fluid/operators/pull_box_sparse_op.cc b/paddle/fluid/operators/pull_box_sparse_op.cc index d37cc35a59945..51786ffc0180d 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_sparse_op.cc @@ -24,12 +24,12 @@ class PullBoxSparseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Ids) of PullBoxSparseOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of PullBoxSparseOp should not be empty.")); auto hidden_size = static_cast(ctx->Attrs().Get("size")); auto all_ids_dim = ctx->GetInputsDim("Ids"); @@ -41,7 +41,7 @@ class PullBoxSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_box_sparse_op.h b/paddle/fluid/operators/pull_box_sparse_op.h index 1ebfa11a2b2e6..06ebe7b5a93d3 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.h +++ b/paddle/fluid/operators/pull_box_sparse_op.h @@ -82,7 +82,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input slots should be same, " "please cheack")); } diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cc b/paddle/fluid/operators/pull_gpups_sparse_op.cc index 6055632f5681a..946a1b8c7136b 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.cc +++ b/paddle/fluid/operators/pull_gpups_sparse_op.cc @@ -24,21 +24,21 @@ class PullGpuPSSparseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Ids) of PullGpuPSSparseOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of PullGpuPSSparseOp should not be empty.")); auto embedding_size_vec = ctx->Attrs().Get>("size"); PADDLE_ENFORCE_EQ( ctx->Inputs("Ids").size(), embedding_size_vec.size(), - platform::errors::InvalidArgument("The ids size: %lu must be equal to " - "the length of embedding size: %lu.", - ctx->Inputs("Ids").size(), - embedding_size_vec.size())); + phi::errors::InvalidArgument("The ids size: %lu must be equal to " + "the length of embedding size: %lu.", + ctx->Inputs("Ids").size(), + embedding_size_vec.size())); auto all_ids_dim = ctx->GetInputsDim("Ids"); const size_t n_ids = all_ids_dim.size(); std::vector outs_dims; @@ -49,7 +49,7 @@ class PullGpuPSSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h index e5e08cfdde685..098e9b143a8e1 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.h +++ b/paddle/fluid/operators/pull_gpups_sparse_op.h @@ -78,7 +78,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input slots should be same, " "please check")); } diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc index 55a6af8466b86..dcea341c8a9e9 100644 --- a/paddle/fluid/operators/pull_sparse_op.cc +++ b/paddle/fluid/operators/pull_sparse_op.cc @@ -25,11 +25,11 @@ class PullSparseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PullSparseOp can not be null")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PullSparseOp can not be null")); auto hidden_size = @@ -43,7 +43,7 @@ class PullSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of " " the 'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_sparse_v2_op.cc b/paddle/fluid/operators/pull_sparse_v2_op.cc index d134607d3c4bb..07af5da7ef92a 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.cc +++ b/paddle/fluid/operators/pull_sparse_v2_op.cc @@ -25,11 +25,11 @@ class PullSparseV2Op : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PullSparseV2Op can not be null")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PullSparseV2Op can not be null")); auto hidden_size = diff --git a/paddle/fluid/operators/push_dense_op.cc b/paddle/fluid/operators/push_dense_op.cc index c0b9b04500648..080610c6e0df1 100644 --- a/paddle/fluid/operators/push_dense_op.cc +++ b/paddle/fluid/operators/push_dense_op.cc @@ -25,7 +25,7 @@ class PushDenseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PushDenseOp can not be null.")); } diff --git a/paddle/fluid/operators/push_dense_op.h b/paddle/fluid/operators/push_dense_op.h index ec7b6b6c3f0bf..6ec833df39583 100644 --- a/paddle/fluid/operators/push_dense_op.h +++ b/paddle/fluid/operators/push_dense_op.h @@ -33,7 +33,7 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { auto table_id = static_cast(ctx.Attr("TableId")); PADDLE_ENFORCE_GT(table_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "table id should > 0, but value is ", table_id)); float scale_datanorm = ctx.Attr("ScaleDataNorm"); const auto& ids = ctx.MultiInput("Ids"); @@ -41,7 +41,7 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { ids[0]->lod().size() ? ids[0]->lod()[0].size() - 1 : ids[0]->dims()[0]; PADDLE_ENFORCE_GT(batch_size, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "batch size should > 0, but value is ", batch_size)); auto fleet_ptr = framework::FleetWrapper::GetInstance(); @@ -51,10 +51,10 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { // note: GetInstance() is not thread-safe // we assume PullDenseWorker has been already initialized in DistMultiTrainer auto pull_dense_worker = framework::PullDenseWorker::GetInstance(); - PADDLE_ENFORCE_NE(pull_dense_worker, - nullptr, - platform::errors::PreconditionNotMet( - "pull_dense_worker should not be null")); + PADDLE_ENFORCE_NE( + pull_dense_worker, + nullptr, + phi::errors::PreconditionNotMet("pull_dense_worker should not be null")); int thread_id = pull_dense_worker->GetThreadIdByScope(&ctx.scope()); pull_dense_worker->IncreaseThreadVersion(thread_id, table_id); #endif diff --git a/paddle/fluid/operators/py_func_op.cc b/paddle/fluid/operators/py_func_op.cc index 7d9c8ceca4943..5e3fa0b5507a0 100644 --- a/paddle/fluid/operators/py_func_op.cc +++ b/paddle/fluid/operators/py_func_op.cc @@ -47,7 +47,7 @@ static py::object *GetPythonCallableObject(size_t i) { PADDLE_ENFORCE_LT( i, g_py_callables.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid python callable id %d, which should be less than %d.", i, g_py_callables.size())); @@ -81,7 +81,7 @@ static void CallPythonFunc(py::object *callable, // Otherwise, ret_num must be equal to out_num PADDLE_ENFORCE_EQ(ret_num == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns " "None. In this case, ret_num = 1 && ret[0] == None " "&& out_num should be 0. But ret_num is %d", @@ -90,7 +90,7 @@ static void CallPythonFunc(py::object *callable, PADDLE_ENFORCE_EQ( out_num == 0, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns None. In " "this case, ret_num = 1 && ret[0] == None && out_num should " "be 0. But out_num is %d", @@ -99,7 +99,7 @@ static void CallPythonFunc(py::object *callable, PADDLE_ENFORCE_EQ( py::cast(ret_tuple[0]) == nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns None. In " "this case, ret_num = 1 && ret[0] == None && out_num should " "be 0. But ret[0] is not None")); @@ -113,12 +113,12 @@ static void CallPythonFunc(py::object *callable, try { auto *py_out_tensor = py::cast(ret_tuple[i]); PADDLE_ENFORCE_NOT_NULL(py_out_tensor, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output tensor %d should not be nullptr", i)); out->set_lod(py_out_tensor->lod()); out->ShareDataWith(*py_out_tensor); } catch (py::cast_error &) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "py::cast to phi::DenseTensor error. The %d-th output exception is " "phi::DenseTensor", i)); @@ -139,15 +139,15 @@ class PyFuncOpVarTypeInference : public framework::StaticGraphVarTypeInference { PADDLE_ENFORCE_EQ( has_in || has_out, true, - platform::errors::InvalidArgument("Input(X) or Output(Out) must exist, " - "but has_in is %d, has_out is %d.", - has_in, - has_out)); + phi::errors::InvalidArgument("Input(X) or Output(Out) must exist, " + "but has_in is %d, has_out is %d.", + has_in, + has_out)); PADDLE_ENFORCE_GE( PADDLE_GET_CONST(int, ctx->GetAttr(kForwardPythonCallableId.data())), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Function id cannot be less than 0, but received value is %d.", PADDLE_GET_CONST(int, ctx->GetAttr(kForwardPythonCallableId.data())))); @@ -192,8 +192,8 @@ class PyFuncOpShapeInference : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( !ctx->IsRuntime(), true, - platform::errors::InvalidArgument("Shape inference cannot be called at " - "run time in 'py_func' operator.")); + phi::errors::InvalidArgument("Shape inference cannot be called at " + "run time in 'py_func' operator.")); } }; diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index f5a8fcaa9de0c..3f92da5d73676 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -88,24 +88,24 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("Input(X) of PyramidHashOP is not found.")); + phi::errors::NotFound("Input(X) of PyramidHashOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::NotFound("Input(W) of PyramidHashOP is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of PyramidHashOP is not found.")); + phi::errors::NotFound("Input(W) of PyramidHashOP is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of PyramidHashOP is not found.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("DropPos"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(DropPos) of PyramidHashOP is not found.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(X) of PyramidHashOP is invalid. " "It should be 2, but got %d", x_dims.size())); @@ -113,7 +113,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { auto w_dims = ctx->GetInputDim("W"); PADDLE_ENFORCE_EQ(w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(W) of PyramidHashOP is invalid. " "It should be 2, but got %d", w_dims.size())); @@ -124,7 +124,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims[0], space_len + rand_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(W) of PyramidHashOP is invalid. " "It should be space_len + rand_len, but now %d != %d + %d", w_dims[0], @@ -133,7 +133,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(W) of PyramidHashOP is invalid." " It should be 1, but got %d", w_dims[1])); @@ -142,7 +142,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( num_emb % rand_len, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The PyramidHashOP's Attr(num_emb) should mod Attr(rand_len), " "but num_emb is %d, rand_len is %d", num_emb, @@ -153,19 +153,19 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("WhiteList"), true, - platform::errors::NotFound("Input(WhiteList) of PyramidHashOP is not " - "found but white_list_len > 0.")); + phi::errors::NotFound("Input(WhiteList) of PyramidHashOP is not " + "found but white_list_len > 0.")); auto wl_dims = ctx->GetInputDim("WhiteList"); PADDLE_ENFORCE_EQ( wl_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WhiteList) of PyramidHashOP is invalid." " It should be 2, but got %d", wl_dims.size())); PADDLE_ENFORCE_EQ(wl_dims[0], white_list_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WhiteList) of " "PyramidHashOP is invalid." " It should be equal to Attr(white_list_len) " @@ -174,7 +174,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { white_list_len)); PADDLE_ENFORCE_EQ(wl_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WhiteList) of " "PyramidHashOP is invalid." " It should be 1, but got %d", @@ -186,19 +186,19 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("BlackList"), true, - platform::errors::NotFound("Input(BlackList) of PyramidHashOP is not " - "found but black_list_len > 0.")); + phi::errors::NotFound("Input(BlackList) of PyramidHashOP is not " + "found but black_list_len > 0.")); auto bl_dims = ctx->GetInputDim("BlackList"); PADDLE_ENFORCE_EQ( bl_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(BlackList) of PyramidHashOP is invalid." " It should be 2, but got %d", bl_dims.size())); PADDLE_ENFORCE_EQ(bl_dims[0], black_list_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(BlackList) of " "PyramidHashOP is invalid." " It should be equal to Attr(black_list_len)" @@ -207,7 +207,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { black_list_len)); PADDLE_ENFORCE_EQ(bl_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(BlackList) of " "PyramidHashOP is invalid." " It should be 1, but got %d", @@ -315,7 +315,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( math::bloomfilter_check(_filter), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The white filter is not loaded successfully, please make sure " "'white_list_len': %d is valid for Input(WhiteList).", white_list_len)); @@ -325,7 +325,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( math::bloomfilter_check(_black_filter), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The black filter is not loaded successfully, please make sure " "'black_list_len': %d is valid for Input(BlackList).", black_list_len)); @@ -442,27 +442,27 @@ class PyramidHashOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of PyramidHashOpGrad is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), - true, - platform::errors::NotFound( - "Input(W) of PyramidHashOpGrad is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of PyramidHashOpGrad is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("W"), + true, + phi::errors::NotFound("Input(W) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput("DropPos"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(DropPos) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("X_Temp_Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X_Temp_Out) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@Grad) of PyramidHashOpGrad is not found.")); } diff --git a/paddle/fluid/operators/quantize_linear_op.cc b/paddle/fluid/operators/quantize_linear_op.cc index c0ef288b5134b..44ff53e8a7d7b 100644 --- a/paddle/fluid/operators/quantize_linear_op.cc +++ b/paddle/fluid/operators/quantize_linear_op.cc @@ -164,17 +164,16 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1 || quant_axis == -1, true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -190,7 +189,7 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( round_type == 0 || round_type == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'round_type' should be 0 or 1, 0 rounding to " "nearest ties to even and 1 is rounding to nearest " "ties away from zero.but the received is %d", diff --git a/paddle/fluid/operators/quantize_linear_op.cu b/paddle/fluid/operators/quantize_linear_op.cu index 8bcbc1107e9d1..d9aa1a860f405 100644 --- a/paddle/fluid/operators/quantize_linear_op.cu +++ b/paddle/fluid/operators/quantize_linear_op.cu @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/quantize_linear_op.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/queue_generator_op.cc b/paddle/fluid/operators/queue_generator_op.cc index 8eee44d6827ea..ca4f943885b2f 100644 --- a/paddle/fluid/operators/queue_generator_op.cc +++ b/paddle/fluid/operators/queue_generator_op.cc @@ -46,13 +46,13 @@ class QueueGeneratorOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( names.size(), 0, - platform::errors::InvalidArgument("The attribute 'names' for " - "Op(queue_generator) must be set.")); + phi::errors::InvalidArgument("The attribute 'names' for " + "Op(queue_generator) must be set.")); int capacity = Attr("capacity"); PADDLE_ENFORCE_GT(capacity, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The attribute 'capacity' for Op(queue_generator) " "must be set a positive value, " "but the one received is %d.", @@ -71,8 +71,8 @@ class QueueGeneratorOp : public framework::OperatorBase { auto var = scope->FindVar(name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound( - "Can't find var named '%s' in the global scope.", name)); + phi::errors::NotFound("Can't find var named '%s' in the global scope.", + name)); auto ptr = var->GetMutable(); ptr->InitOnce(capacity); diff --git a/paddle/fluid/operators/random_routing_op.cc b/paddle/fluid/operators/random_routing_op.cc index dffcc9c361a66..e579b3f6146e2 100644 --- a/paddle/fluid/operators/random_routing_op.cc +++ b/paddle/fluid/operators/random_routing_op.cc @@ -37,17 +37,17 @@ class RandomRoutingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(prob_dims[0], topk_val_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); PADDLE_ENFORCE_EQ(topk_idx_dims[1], topk_val_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); PADDLE_ENFORCE_EQ(topk_idx_dims[0], topk_val_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); ctx->SetOutputDim("Out", topk_idx_dims); @@ -62,7 +62,7 @@ class RandomRoutingOp : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, "TopK_Idx"); PADDLE_ENFORCE_EQ(topk_idx_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the topk_idx_dtype should be int64")); const auto& topk_value_type = diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 195ef276b957e..8350794081bbe 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -23,23 +23,23 @@ namespace operators { template void GetSize(T start, T end, T step, int64_t* size) { - PADDLE_ENFORCE_NE(step, - 0, - platform::errors::InvalidArgument( - "The step of range op should not be 0.")); + PADDLE_ENFORCE_NE( + step, + 0, + phi::errors::InvalidArgument("The step of range op should not be 0.")); if (start < end) { PADDLE_ENFORCE_GT( step, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The step should be greater than 0 while start < end.")); } if (start > end) { PADDLE_ENFORCE_LT(step, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The step should be less than 0 while start > end.")); } diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc index 95de6f4e08054..aaef2782f5e21 100644 --- a/paddle/fluid/operators/rank_attention_op.cc +++ b/paddle/fluid/operators/rank_attention_op.cc @@ -27,32 +27,32 @@ class RankAttentionOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("RankOffset"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(RankOffset) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("RankParam"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(RankParam) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("InsRank"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(InsRank) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("InputHelp"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(InputHelp) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of RankAttentionOp should not be null.")); auto max_rank = ctx->Attrs().Get("MaxRank"); @@ -64,13 +64,13 @@ class RankAttentionOp : public framework::OperatorWithKernel { auto x_fea_dim = x_dims[1]; auto block_matrix_row = max_rank * x_fea_dim; - PADDLE_ENFORCE_EQ((rank_offset_dims[1] - 1) / 2, - max_rank, - platform::errors::InvalidArgument( - "Input(RankOffset) has wrong columns, " - "except columns to be %d, but got %d", - max_rank, - (rank_offset_dims[1] - 1) / 2)); + PADDLE_ENFORCE_EQ( + (rank_offset_dims[1] - 1) / 2, + max_rank, + phi::errors::InvalidArgument("Input(RankOffset) has wrong columns, " + "except columns to be %d, but got %d", + max_rank, + (rank_offset_dims[1] - 1) / 2)); ctx->SetOutputDim("Out", {ins_num, para_col}); ctx->SetOutputDim("InputHelp", {ins_num, block_matrix_row}); @@ -94,23 +94,23 @@ class RankAttentionGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("RankParam"), - true, - platform::errors::InvalidArgument( - "Input(RankParam) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("RankOffset"), - true, - platform::errors::InvalidArgument( - "Input(RankOffset) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("InputHelp"), - true, - platform::errors::InvalidArgument( - "Input(InputHelp) should not be null")); + phi::errors::InvalidArgument("Input(X) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("RankParam"), + true, + phi::errors::InvalidArgument("Input(RankParam) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("RankOffset"), + true, + phi::errors::InvalidArgument("Input(RankOffset) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("InputHelp"), + true, + phi::errors::InvalidArgument("Input(InputHelp) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput("InsRank"), true, - platform::errors::InvalidArgument("Input(InsRank) should not be null")); + phi::errors::InvalidArgument("Input(InsRank) should not be null")); ctx->SetOutputDim(framework::GradVarName("RankParam"), ctx->GetInputDim("RankParam")); diff --git a/paddle/fluid/operators/rank_attention_op.cu b/paddle/fluid/operators/rank_attention_op.cu index 6d6c4c6a6d1dc..d73de790a527e 100644 --- a/paddle/fluid/operators/rank_attention_op.cu +++ b/paddle/fluid/operators/rank_attention_op.cu @@ -48,15 +48,15 @@ class RankAttentionCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rank_offset_dims[0], ins_num, - platform::errors::InvalidArgument("Input(RankOffset) has wrong rows.")); - PADDLE_ENFORCE_EQ((rank_offset_dims[1] - 1) / 2, - max_rank, - platform::errors::InvalidArgument( - "Input(RankOffset) has wrong columns.")); + phi::errors::InvalidArgument("Input(RankOffset) has wrong rows.")); + PADDLE_ENFORCE_EQ( + (rank_offset_dims[1] - 1) / 2, + max_rank, + phi::errors::InvalidArgument("Input(RankOffset) has wrong columns.")); PADDLE_ENFORCE_EQ( max_rank * max_rank * x_fea_dim, para_row, - platform::errors::InvalidArgument("Input(RankParam) has wrong rows.")); + phi::errors::InvalidArgument("Input(RankParam) has wrong rows.")); int block_matrix_row = max_rank * x_fea_dim; diff --git a/paddle/fluid/operators/rank_attention_op.h b/paddle/fluid/operators/rank_attention_op.h index 5124e91653810..f119c4a2f315c 100644 --- a/paddle/fluid/operators/rank_attention_op.h +++ b/paddle/fluid/operators/rank_attention_op.h @@ -23,10 +23,10 @@ template class RankAttentionKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::Unimplemented( - "Rank Attention only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::Unimplemented("Rank Attention only supports GPU now.")); } }; } // namespace operators diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc index ebdddfd41b33f..42f2ac2959502 100644 --- a/paddle/fluid/operators/rank_loss_op.cc +++ b/paddle/fluid/operators/rank_loss_op.cc @@ -49,22 +49,22 @@ class RankLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( label_dims.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension size of Input(Label) must be greater than " "or equal to 1, but received %d.", label_dims.size())); PADDLE_ENFORCE_LE( label_dims.size(), 2, - platform::errors::InvalidArgument("The dimension size of Input(Label) " - "must be less than or equal to 2, " - "but received %d.", - label_dims.size())); + phi::errors::InvalidArgument("The dimension size of Input(Label) " + "must be less than or equal to 2, " + "but received %d.", + label_dims.size())); if (label_dims.size() == 2U) { PADDLE_ENFORCE_EQ( label_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(Label) must be 1, but received %d.", label_dims[1])); } @@ -72,22 +72,22 @@ class RankLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( left_dims.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension size of Input(Left) must be greater than " "or equal to 1, but received %d.", left_dims.size())); PADDLE_ENFORCE_LE( left_dims.size(), 2, - platform::errors::InvalidArgument("The dimension size of Input(Left) " - "must be less than or equal to 2, " - "but received %d.", - left_dims.size())); + phi::errors::InvalidArgument("The dimension size of Input(Left) " + "must be less than or equal to 2, " + "but received %d.", + left_dims.size())); if (left_dims.size() == 2U) { PADDLE_ENFORCE_EQ( left_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(Left) must be 1, but received %d.", left_dims[1])); } @@ -95,29 +95,29 @@ class RankLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( right_dims.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension size of Input(Right) must be greater than " "or equal to 1, but received %d.", right_dims.size())); PADDLE_ENFORCE_LE( right_dims.size(), 2, - platform::errors::InvalidArgument("The dimension size of Input(Right) " - "must be less than or equal to 2, " - "but received %d.", - right_dims.size())); + phi::errors::InvalidArgument("The dimension size of Input(Right) " + "must be less than or equal to 2, " + "but received %d.", + right_dims.size())); if (right_dims.size() == 2U) { PADDLE_ENFORCE_EQ( right_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(Right) must be 1, but received %d.", right_dims[1])); } PADDLE_ENFORCE_EQ( label_dims[0], left_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Label) and Input(Left) " "must have the same value. But received Label.dims[0]=%d, " "Left.dims[0]=%d.", @@ -126,7 +126,7 @@ class RankLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dims[0], right_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Label) and Input(Right) " "must have the same value. But received Label.dims[0]=%d, " "Right.dims[0]=%d.", diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index 4d3e79546fbef..35e76304b6c32 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -36,7 +36,7 @@ class BlockingQueue { : capacity_(capacity), speed_test_mode_(speed_test_mode) { PADDLE_ENFORCE_GT(capacity_, static_cast(0), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The capacity of a reader::BlockingQueue must be " "greater than 0, but received capacity is %d.", capacity_)); @@ -59,7 +59,7 @@ class BlockingQueue { PADDLE_ENFORCE_LT( queue_.size(), capacity_, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "The queue size cannot exceed the set queue capacity. Expected " "queue size is less than %d. But received %d", capacity_, @@ -86,7 +86,7 @@ class BlockingQueue { PADDLE_ENFORCE_LT( queue_.size(), capacity_, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "The queue size cannot exceed the set queue capacity. Expected " "queue size is less than %d. But received %d", capacity_, @@ -104,7 +104,7 @@ class BlockingQueue { if (!queue_.empty()) { PADDLE_ENFORCE_NOT_NULL( elem, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The holder to receive queue data is null pointer.")); *elem = queue_.front(); if (LIKELY(!speed_test_mode_)) { @@ -115,7 +115,7 @@ class BlockingQueue { } else { PADDLE_ENFORCE_EQ(closed_, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Blocking queue status error, if queue is empty " "when pop data, it should be closed.")); VLOG(3) << "queue is closed! return nothing."; @@ -168,11 +168,10 @@ class BlockingQueue { private: inline void EnforceNotKilled() { - PADDLE_ENFORCE_NE( - killed_, - true, - platform::errors::Fatal("Blocking queue is killed because the " - "data reader raises an exception.")); + PADDLE_ENFORCE_NE(killed_, + true, + phi::errors::Fatal("Blocking queue is killed because the " + "data reader raises an exception.")); } private: diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index cc5034c86f90f..15bbc9ff10965 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -127,7 +127,7 @@ void BufferedReader::ReadAsync(size_t i) { PADDLE_ENFORCE_EQ( cuda.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on GPU and CPU devices are not matched.")); } if (pin_memory_) { @@ -250,7 +250,7 @@ void BufferedReader::ReadAsync(size_t i) { PADDLE_ENFORCE_EQ( xpu.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on XPU and CPU devices are not matched. " "The number on XPU is %d, on CPU is %d", xpu.size(), @@ -308,7 +308,7 @@ void BufferedReader::ReadAsync(size_t i) { } else { PADDLE_ENFORCE_EQ(custom_device.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on CustomDevice and CPU " "devices are not matched. " "The number on CustomDevice is %d, on CPU is %d", diff --git a/paddle/fluid/operators/reader/create_ctr_reader_op.cc b/paddle/fluid/operators/reader/create_ctr_reader_op.cc index 8c38aaf528da0..de0dff6be2533 100644 --- a/paddle/fluid/operators/reader/create_ctr_reader_op.cc +++ b/paddle/fluid/operators/reader/create_ctr_reader_op.cc @@ -35,7 +35,7 @@ class CreateCTRReaderOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "No LoDTensorBlockingQueueHolder variable with name %s found", queue_name)); auto* queue_holder = diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc index 43fb5d9059c15..6a18e417a39bb 100644 --- a/paddle/fluid/operators/reader/create_custom_reader_op.cc +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -100,12 +100,12 @@ class CustomReaderInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "'CustomReaderInferShape' should only be invoked during " "compile time.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output decorated reader should not be null.")); const auto* sub_block = ctx->Attrs().Get("sub_block"); @@ -117,7 +117,7 @@ class CustomReaderInferShape : public framework::InferShapeBase { auto* sink_var = sub_block->FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( sink_var, - platform::errors::NotFound( + phi::errors::NotFound( "The sink variable is not found in CustomReader.")); res_dims.emplace_back(sink_var->GetShape()); res_lod_levels.push_back(sink_var->GetLoDLevel()); @@ -135,7 +135,7 @@ class CustomReaderInferVarType : public framework::VarTypeInference { auto& out_var_name = ctx->Output("Out")[0]; PADDLE_ENFORCE_EQ(ctx->HasVar(out_var_name), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output reader variable should not be null.")); ctx->SetType(out_var_name, framework::proto::VarType::READER); @@ -148,7 +148,7 @@ class CustomReaderInferVarType : public framework::VarTypeInference { framework::VarDesc* var = sub_block->FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound( + phi::errors::NotFound( "The sink variable is not found in CustomReader.")); res_data_types.emplace_back(var->GetDataType()); } @@ -167,7 +167,7 @@ void CustomReader::ReadNextImpl(paddle::framework::LoDTensorArray* out) { PADDLE_ENFORCE_EQ( source_var_names_.size(), underlying_outs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of source_var_names(%d) and the size of " "underlying_outs(%d) are not consistent. Each feeding element " "must have its own source variable.", @@ -192,8 +192,8 @@ void CustomReader::ReadNextImpl(paddle::framework::LoDTensorArray* out) { auto* var = exe_scope->FindVar(sink_var_names_[i]); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("The variable %s is not in current scope.", - sink_var_names_[i])); + phi::errors::NotFound("The variable %s is not in current scope.", + sink_var_names_[i])); const auto& tensor = var->Get(); framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); } diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index 5cea8f5963111..975a32e9ab496 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -35,9 +35,9 @@ class CreateDoubleBufferReaderOp : public framework::OperatorBase { dynamic_cast(out->Get().get()); PADDLE_ENFORCE_NOT_NULL( decorated_reader, - platform::errors::NotFound("The inited reader should be a " - "DecoratedReader when running " - "create_double_buffer_reader op.")); + phi::errors::NotFound("The inited reader should be a " + "DecoratedReader when running " + "create_double_buffer_reader op.")); if (decorated_reader->UnderlyingReader() == underlying_reader.Get()) { return; } diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc index e9edce4423e26..c55e77fc14787 100644 --- a/paddle/fluid/operators/reader/create_py_reader_op.cc +++ b/paddle/fluid/operators/reader/create_py_reader_op.cc @@ -35,7 +35,7 @@ class CreatePyReaderOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::NotFound( + phi::errors::NotFound( "No LoDTensorBlockingQueueHolder variable with name %s found. This " "may be because the DataLoader is defined in another Scope, " "which is different from the Scope when calling Executor.run.", diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h index da265a6fce76d..208377937c130 100644 --- a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -92,15 +92,15 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { std::lock_guard lock(init_mutex_); PADDLE_ENFORCE_GE(dev_cnt, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Device count to init " "OrderedMultiDeviceLoDTensorBlockingQueue" " must be larger than 1")); if (!queues_.empty()) { - PADDLE_ENFORCE_EQ(queues_.size(), - dev_cnt, - platform::errors::InvalidArgument( - "queues should be only inited once")); + PADDLE_ENFORCE_EQ( + queues_.size(), + dev_cnt, + phi::errors::InvalidArgument("queues should be only inited once")); return; } @@ -119,7 +119,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { PADDLE_ENFORCE_LT( idx, queues_.size(), - platform::errors::OutOfRange("The queue index is out of range")); + phi::errors::OutOfRange("The queue index is out of range")); return queues_[idx]; } @@ -184,7 +184,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { void EnforceIsInited() const { PADDLE_ENFORCE_EQ(queues_.empty(), false, - platform::errors::NotFound("queue has not been inited")); + phi::errors::NotFound("queue has not been inited")); } private: @@ -209,8 +209,8 @@ class LoDTensorBlockingQueueHolder { PADDLE_ENFORCE_EQ( queue_, nullptr, - platform::errors::AlreadyExists("LoDTensorBlockingQueueHolder::" - "InitOnce() can only be called once")); + phi::errors::AlreadyExists("LoDTensorBlockingQueueHolder::" + "InitOnce() can only be called once")); queue_ = std::make_unique(capacity, speed_test_mode); } @@ -228,7 +228,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueueHolder { void InitOnce(size_t capacity, bool speed_test_mode = false) { PADDLE_ENFORCE_EQ(queue_, nullptr, - platform::errors::AlreadyExists( + phi::errors::AlreadyExists( "OrderedMultiDeviceLoDTensorBlockingQueueHolder::" "InitOnce() can only be called once")); queue_ = std::make_unique( diff --git a/paddle/fluid/operators/reader/py_reader.cc b/paddle/fluid/operators/reader/py_reader.cc index f0c0409a729a5..d71f4b9e9ca95 100644 --- a/paddle/fluid/operators/reader/py_reader.cc +++ b/paddle/fluid/operators/reader/py_reader.cc @@ -25,7 +25,7 @@ PyReader::PyReader( const std::vector& need_check_feed) : framework::FileReader(dims, var_types, need_check_feed) { PADDLE_ENFORCE_NOT_NULL(queue, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "LoDTensorBlockingQueue must not be null.")); queue_ = queue; } diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 1c65669adc3a9..d88dfb4962a9c 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -51,7 +51,7 @@ class ReadInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( reader_dims.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reader's dim number doesn't match the output number.")); ctx->SetOutputsDim("Out", reader_dims); auto in_desc = @@ -61,7 +61,7 @@ class ReadInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( in_lod_levels.size(), out_var_ptrs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LoDLevels of Input(Reader) must be the same as the " "number of Outputs(Out).")); for (size_t i = 0; i < out_var_ptrs.size(); ++i) { @@ -82,7 +82,7 @@ class ReadInferVarType : public framework::StaticGraphVarTypeInference { auto dtypes = GetDataTypes(ctx, reader_name); PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of input reader's dtypes do not match " "the output variable number.")); for (size_t i = 0; i < dtypes.size(); ++i) { @@ -120,8 +120,8 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ins.size(), out_arg_names.size(), - platform::errors::InvalidArgument("input data number and output data " - "number of read_op do not match")); + phi::errors::InvalidArgument("input data number and output data " + "number of read_op do not match")); const std::vector& shapes = reader->Shapes(); const std::vector& var_types = @@ -130,7 +130,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( out_arg_names.size(), need_check_feed.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output size of read_op and the number of fed " "variables of reader do not match. Received size of output is %d, " "number of fed variables of reader is %d", @@ -145,7 +145,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( DimensionIsCompatibleWith(shapes[i], in_dims), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fed Variable %s should have dimensions = %d, " "shape = [%s], but received fed shape [%s]", out_arg_names[i], @@ -155,7 +155,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( framework::TransToProtoVarType(ins[i].dtype()), var_types[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The data type of fed Variable %s must be %s, but received %s", out_arg_names[i], var_types[i], diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index e62d728b6f017..9a1693c5061c7 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -69,13 +69,13 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet("'FileReaderInferShape' should only " - "be invoked during compile time.")); + phi::errors::PreconditionNotMet("'FileReaderInferShape' should only " + "be invoked during compile time.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound("The output file reader should not be null.")); + phi::errors::NotFound("The output file reader should not be null.")); bool use_data_config = ctx->Attrs().Get("use_data_config"); if (use_data_config) { const auto shape_concat = @@ -88,7 +88,7 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( lod_levels.size(), shapes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of 'lod_levels'(%d) doesn't match the number " "of 'shapes'(%d).", lod_levels.size(), @@ -97,16 +97,16 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( dtypes.size(), shapes.size(), - platform::errors::InvalidArgument("The number of 'dtypes'(%d) doesn't " - "match the number of 'shapes'(%d).", - dtypes.size(), - shapes.size())); + phi::errors::InvalidArgument("The number of 'dtypes'(%d) doesn't " + "match the number of 'shapes'(%d).", + dtypes.size(), + shapes.size())); const auto need_check_feed = ctx->Attrs().Get>("need_check_feed"); PADDLE_ENFORCE_EQ( need_check_feed.size(), shapes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of 'need_check_feed'(%d) doesn't match the " "number of 'shapes'(%d).", need_check_feed.size(), @@ -127,18 +127,18 @@ void DecoratedReaderInferShape::operator()( PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "'DecoratedReaderInferShape' should only be invoked during " "compile time.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("UnderlyingReader"), - true, - platform::errors::NotFound( - "Input(UnderlyingReader) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "The output decorated reader should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("UnderlyingReader"), + true, + phi::errors::NotFound("Input(UnderlyingReader) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("The output decorated reader should not be null.")); ctx->SetReaderDims("Out", ctx->GetReaderDims("UnderlyingReader")); framework::VarDesc* in_reader = PADDLE_GET( diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index a5d4ce5e29828..42856d5b3c12a 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -77,10 +77,10 @@ StepScopes::StepScopes(const platform::DeviceContext &dev_ctx, is_train_(is_train), is_backward_(is_backward) { size_t num_step_scopes = is_train ? seq_len : 2; - PADDLE_ENFORCE_EQ(is_train || !is_backward, - true, - platform::errors::PreconditionNotMet( - "Cannot backward when is not training")); + PADDLE_ENFORCE_EQ( + is_train || !is_backward, + true, + phi::errors::PreconditionNotMet("Cannot backward when is not training")); if (!is_backward_) { ClearStepScopes(dev_ctx, const_cast(&parent), scopes); scopes->reserve(static_cast(num_step_scopes)); @@ -101,7 +101,7 @@ void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx, framework::Scope *parent_scope) { PADDLE_ENFORCE_EQ(is_backward_, true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot get backward next scope when is forward")); if (counter_ + 2 == scopes_->size()) { parent_scope->DeleteScope((*scopes_)[counter_ + 1]); @@ -114,7 +114,7 @@ void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx, void StepScopes::ForwardNext() { PADDLE_ENFORCE_EQ(is_backward_, false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot get forward next scope when is backward")); ++counter_; } @@ -126,7 +126,7 @@ framework::Scope &StepScopes::GetScope(size_t scope_id) const { PADDLE_ENFORCE_LT( scope_id, scopes_->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input scope_id is greater than scopes size in RecurrentOp")); return *(*scopes_)[scope_id]; } @@ -149,16 +149,16 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { PADDLE_ENFORCE_EQ( all_inputs.empty(), false, - platform::errors::InvalidArgument("RecurrentOp gets empty input")); + phi::errors::InvalidArgument("RecurrentOp gets empty input")); for (auto &iname : all_inputs) { auto *var = scope.FindVar(iname); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp finds var %s is NULL", iname)); PADDLE_ENFORCE_EQ( var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp only accepts phi::DenseTensor as input but " "input var %s is not phi::DenseTensor", iname)); @@ -168,7 +168,7 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { } else { PADDLE_ENFORCE_EQ(seq_len, dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sequence length of input %s in RecurrentOp is NOT " "equal to sequence length of previous input", iname)); @@ -176,7 +176,7 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { } PADDLE_ENFORCE_GE(seq_len, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp gets invalid sequence length. Expected " "seq_len >= 0. Received seq_len = %d", seq_len)); @@ -331,9 +331,9 @@ StepScopes RecurrentOp::CreateStepScopes(const platform::DeviceContext &dev_ctx, // fault in multithreading in eval process. The performance drop of // adding mutex need to be fixed. auto *var = scope.FindVar(Output(kStepScopes)); - PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( - "RecurrentOp gets empty StepScopes var")); + PADDLE_ENFORCE_NOT_NULL( + var, + phi::errors::InvalidArgument("RecurrentOp gets empty StepScopes var")); return StepScopes(dev_ctx, scope, var->GetMutable(), @@ -413,7 +413,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE_EQ(ex_state_grads.size(), cur_state_grads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "lengths of ex_states and cur_states are not " "equal in RecurrentGradOp")); for (size_t i = 0; i < ex_state_grads.size(); ++i) { @@ -475,7 +475,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of Parameters and ParamGrads are not equal " "in RecurrentGradOp")); @@ -566,7 +566,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, // Delete the scope of StepScopes auto *var = scope.FindVar(Input(kStepScopes)); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "StepScopes var is empty in RecurrentGradOp")); auto *step_scopes = var->GetMutable(); ClearStepScopes(dev_ctx, const_cast(&scope), step_scopes); @@ -578,7 +578,7 @@ StepScopes RecurrentGradOp::CreateStepScopes( size_t seq_len) const { auto *var = scope.FindVar(Input(kStepScopes)); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "StepScopes var is empty in RecurrentGradOp")); return StepScopes(dev_ctx, scope, @@ -735,27 +735,27 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { .Get>(RecurrentBase::kExStates) .size(), 0, - platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kExStates)); + phi::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kExStates)); PADDLE_ENFORCE_EQ( ctx->Attrs() .Get>(RecurrentBase::kStates) .size(), 0, - platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kStates)); + phi::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kStates)); } PADDLE_ENFORCE_EQ( ctx->HasInputs(RecurrentBase::kInputs), true, - platform::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kInputs)); + phi::errors::InvalidArgument("The input(%s) should not be empty.", + RecurrentBase::kInputs)); PADDLE_ENFORCE_EQ( ctx->HasInputs(RecurrentBase::kOutputs), true, - platform::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kOutputs)); + phi::errors::InvalidArgument("The input(%s) should not be empty.", + RecurrentBase::kOutputs)); // In some case the kInitialStates is empty. if (ctx->HasInputs(RecurrentBase::kInitialStates) && @@ -769,7 +769,7 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { ctx->HasOutputs(framework::GradVarName(RecurrentBase::kInputs), /*allow_null=*/true), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of(%s) should not be empty.", framework::GradVarName(RecurrentBase::kInputs))); ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInputs), @@ -780,7 +780,7 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( ctx->HasOutputs(framework::GradVarName(RecurrentBase::kParameters)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of(%s) should not be empty.", framework::GradVarName(RecurrentBase::kParameters))); ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kParameters), diff --git a/paddle/fluid/operators/recurrent_op.h b/paddle/fluid/operators/recurrent_op.h index d027205429513..b1be9a5c0389e 100644 --- a/paddle/fluid/operators/recurrent_op.h +++ b/paddle/fluid/operators/recurrent_op.h @@ -122,7 +122,7 @@ class RecurrentBase : public framework::OperatorBase { bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of source vars and destination vars are not " "equal in LinkTensor.")); for (size_t i = 0; i < dst_vars.size(); ++i) { @@ -148,7 +148,7 @@ class RecurrentBase : public framework::OperatorBase { bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of source vars and destination vars are not " "equal in LinkTensor.")); for (size_t i = 0; i < dst_vars.size(); ++i) { @@ -180,8 +180,8 @@ class RecurrentBase : public framework::OperatorBase { } PADDLE_ENFORCE_NOT_NULL( src_var, - platform::errors::NotFound("Source variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Source variable %s is not found.", + src_var_name)); auto &src_tensor = src_var->Get(); auto *dst_var = dst_scope->Var(dst_var_name); @@ -203,13 +203,13 @@ class RecurrentBase : public framework::OperatorBase { auto *src_var = src_scope.FindVar(src_var_name); PADDLE_ENFORCE_NOT_NULL( src_var, - platform::errors::NotFound("Source variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Source variable %s is not found.", + src_var_name)); auto &src_tensor = src_var->Get(); PADDLE_ENFORCE_NOT_NULL( dst_var, - platform::errors::NotFound("Destination variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Destination variable %s is not found.", + src_var_name)); auto *dst_tensor = dst_var->GetMutable(); callback(src_tensor, dst_tensor); } diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index b23fee1a012df..a7776609b79b8 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -57,12 +57,12 @@ class XPULogsumexpKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); int r = xpu::logsumexp( dev_ctx.x_context(), input_data, output_data, xdims, axis_shape); - PADDLE_ENFORCE_EQ(r, - xpu::Error_t::SUCCESS, - platform::errors::External( - "XPU logsumexp kernel error! error value[%d %]", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, + xpu::Error_t::SUCCESS, + phi::errors::External("XPU logsumexp kernel error! error value[%d %]", + r, + XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h b/paddle/fluid/operators/reduce_ops/reduce_mean_op.h index 017fab6308821..eb82be83ba517 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.h @@ -62,7 +62,7 @@ struct FP16MeanGradFunctor { int size) { dx->device(place) = (dy->template cast().broadcast(dim) / dx->template cast().constant(size)) - .template cast(); + .template cast(); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 428c8d2c9a02c..2e14acddc1485 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -87,7 +87,7 @@ static inline std::vector GetReduceDim(const std::vector& dims, for (auto e : dims) { PADDLE_ENFORCE_LT(e, dim_size, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ReduceBaseOp: invalid axis, when x_dims is %d, " "axis[i] should less than x_dims, but got %d.", dim_size, @@ -511,7 +511,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { auto dims = ctx->Attrs().Get>("dim"); PADDLE_ENFORCE_GT(dims.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim dimensions of ReduceBaseOp " "should be greater than 0. But received the dim " "dimensions of Reduce = %d.", @@ -521,7 +521,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( dims[i], x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)] " "which dimension = %d. But received dim index = %d.", @@ -531,7 +531,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( dims[i], -x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)] " "which dimension = %d. But received dim index = %d.", @@ -628,7 +628,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -670,7 +670,7 @@ class ReduceGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( dims[i], x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)], " "which dimension = %d. But received dim index = %d.", diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index 35cc8fea6d0ba..31279af17e176 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -34,10 +34,9 @@ void XPUReduce(const framework::ExecutionContext& context, T*, const std::vector&, const std::vector&)> func) { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(context.GetPlace()), + true, + phi::errors::Unavailable("This kernel only runs on XPU.")); bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); auto* x = context.Input("X"); @@ -48,7 +47,7 @@ void XPUReduce(const framework::ExecutionContext& context, int out_dtype = context.Attr("out_dtype"); PADDLE_ENFORCE_EQ(out_dtype == -1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "XPU only support out_dtype == -1 in reduce op.")); const auto* x_data = x->data(); @@ -88,16 +87,16 @@ void XPUReduce(const framework::ExecutionContext& context, dev_ctx.x_context(), x_data, y_data, x->numel() * sizeof(T)); PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::External("XPU copy in reduce op return " - "wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); + phi::errors::External("XPU copy in reduce op return " + "wrong value[%d %s].", + r, + XPUAPIErrorMsg[r])); } else { int r = func(dev_ctx.x_context(), x_data, y_data, xdims, reduce_dims); PADDLE_ENFORCE_EQ( r == xpu::Error_t::SUCCESS, true, - platform::errors::External( + phi::errors::External( "XPU reduce op return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } } diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc index 319fad9b39231..5ce59fc54d6a6 100644 --- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc @@ -210,10 +210,10 @@ class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase { size_t out_offset = 0; out->mutable_lod()->clear(); for (auto &item : rank_table.items()) { - PADDLE_ENFORCE_LT(item.index, - absolute_table.size(), - platform::errors::OutOfRange( - "The value of rank_table is out of range.")); + PADDLE_ENFORCE_LT( + item.index, + absolute_table.size(), + phi::errors::OutOfRange("The value of rank_table is out of range.")); out_offset = CopyTensorAndLod( place, absolute_table[item.index], x, out, out_offset); } diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index d0af82510bdc4..e276ef2082fb6 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -29,12 +29,12 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of RepeatInterleaveOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of RepeatInterleaveOp should not be null.")); auto input_dim = ctx->GetInputDim("X"); @@ -43,7 +43,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim < input_dim.size() && dim >= (0 - input_dim.size()), true, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(dim) is out of range, It's expected " "to be in range of [-%d, %d]. But received Attr(dim) = %d.", input_dim.size(), @@ -58,7 +58,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { repeats_dim.size() == 1 || (repeats_dim.size() == 2 && repeats_dim[1] == 1), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' of Input(RepeatsTensor) must be 1-D tensor. " "But received: the 'shape' of Input(Index) is [%s], " "the dimension of Input(Index) is [%d].", @@ -67,7 +67,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(repeats_dim[0] != 0, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(RepeatsTensor) can't be 0.")); if (dim < 0) { @@ -98,14 +98,14 @@ class RepeatInterleaveGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), - true, - platform::errors::InvalidArgument( - "Output(X@GRAD) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("X")), + true, + phi::errors::InvalidArgument("Output(X@GRAD) should be not null.")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 34d80604ae8b0..d984edc4c4172 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -52,11 +52,11 @@ class ReshapeOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of ReshapeOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ReshapeOp should not be null.")); if (ctx->IsRuntime()) { @@ -76,7 +76,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( ShapeTensor.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When `shape` in ReshapeOp is a list or tuple " "which contains Tensor, the shape's size can't be zero. " "But received shape's size is %d.", @@ -89,7 +89,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. But received shape[%d] " "= 0, X's dimensions = %d, X's shape = [%s].", @@ -155,7 +155,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( unk_dim_idx, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", common::make_ddim(shape), @@ -165,7 +165,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " @@ -178,7 +178,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", @@ -204,7 +204,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( output_shape[unk_dim_idx] * capacity, -in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' attribute in ReshapeOp is invalid. " "The input tensor X'size must be divisible by known " "capacity of 'shape'. " @@ -222,7 +222,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X'size must be equal to the capacity of " "'shape'. " @@ -242,7 +242,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X's shape = [%s], X's capacity = %d." "But the target shape of Out is [%s], the " @@ -359,11 +359,11 @@ class ReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) shouldn't be null.")); + phi::errors::InvalidArgument("Input(X) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) shouldn't be null.")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } @@ -613,11 +613,11 @@ class Reshape2GradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("XShape"), true, - platform::errors::InvalidArgument("Input(XShape) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) shouldn't be null.")); + phi::errors::InvalidArgument("Input(XShape) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) shouldn't be null.")); // Construct MetaTensor for InferMeta Func using CompatMetaTensor = framework::CompatMetaTensor; @@ -774,7 +774,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, ops::ReshapeKernel, int64_t, ops::ReshapeKernel, - plat::float16, + phi::dtype::float16, ops::ReshapeKernel, plat::bfloat16, ops::ReshapeKernel); @@ -791,7 +791,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, ops::ReshapeGradKernel, uint8_t, ops::ReshapeGradKernel, - plat::float16, + phi::dtype::float16, ops::ReshapeGradKernel, plat::bfloat16, ops::ReshapeGradKernel); diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 04633c9e8e5dd..38d77de90ace4 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -39,7 +39,7 @@ class RowConvOp : public framework::OperatorWithKernel { auto filter_dims = ctx->GetInputDim("Filter"); PADDLE_ENFORCE_EQ(filter_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Filter)'s dimensions should be 2. Received: " "Input(Filter)'s shape: [%s].", filter_dims)); diff --git a/paddle/fluid/operators/rrelu_op.cc b/paddle/fluid/operators/rrelu_op.cc index 53f6969695e8e..3111ad4e5015d 100644 --- a/paddle/fluid/operators/rrelu_op.cc +++ b/paddle/fluid/operators/rrelu_op.cc @@ -52,7 +52,7 @@ class RReluOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& lower) { PADDLE_ENFORCE_EQ(lower >= 0.0f && lower < 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'RRelu_lower' must be between 0.0 and 1.0.")); }); float defalut_upper = 1. / 3.; @@ -61,7 +61,7 @@ class RReluOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& upper) { PADDLE_ENFORCE_EQ(upper > 0.0f && upper <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'RRelu_upper' must be between 0.0 and 1.0.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/run_program_op.cc b/paddle/fluid/operators/run_program_op.cc index ffb024d165d36..0dc2d8ea0e20d 100644 --- a/paddle/fluid/operators/run_program_op.cc +++ b/paddle/fluid/operators/run_program_op.cc @@ -24,13 +24,13 @@ class RunProgramOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), - true, - platform::errors::NotFound( - "Input(X) of RunProgramOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInputs("X"), + true, + phi::errors::NotFound("Input(X) of RunProgramOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of RunProgramOp should not be null.")); } @@ -173,12 +173,12 @@ class RunProgramGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of RunProgramGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInputs(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of RunProgramGradOp should not be null.")); // NOTE: The X@GRAD and Params@GRAD may not exist, // because they can be set stop_gradient = True diff --git a/paddle/fluid/operators/run_program_op.cu b/paddle/fluid/operators/run_program_op.cu index 9a2b6851a4c73..1d9011429577b 100644 --- a/paddle/fluid/operators/run_program_op.cu +++ b/paddle/fluid/operators/run_program_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/run_program_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index 6006d7556423c..895a99608c902 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -32,7 +32,7 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/variable.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/operators/cuda_graph_with_in_out.h" @@ -58,7 +58,7 @@ static void CheckInputVarStatus(const Variable &var, const std::string &var_name) { PADDLE_ENFORCE_EQ(var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input variable %s of " "RunProgram(Grad)Op holds " "wrong type. Expect type is phi::DenseTensor, but " @@ -68,10 +68,10 @@ static void CheckInputVarStatus(const Variable &var, PADDLE_ENFORCE_EQ( var.Get().IsInitialized(), true, - platform::errors::InvalidArgument("The tensor in input variable %s of " - "RunProgram(Grad)Op " - "is not initialized.", - var_name)); + phi::errors::InvalidArgument("The tensor in input variable %s of " + "RunProgram(Grad)Op " + "is not initialized.", + var_name)); } static void CheckOutputVarStatus(const Variable &src_var, @@ -81,7 +81,7 @@ static void CheckOutputVarStatus(const Variable &src_var, PADDLE_ENFORCE_EQ( src_var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output variable %s get from " "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is phi::DenseTensor, but receive type is " @@ -90,7 +90,7 @@ static void CheckOutputVarStatus(const Variable &src_var, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor in output variable %s get from " "RunProgram(Grad)Op's internal " "scope is not initialized.", @@ -99,7 +99,7 @@ static void CheckOutputVarStatus(const Variable &src_var, PADDLE_ENFORCE_EQ( src_var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output variable %s get from " "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is SelectedRows, but receive type is %s.", @@ -107,14 +107,14 @@ static void CheckOutputVarStatus(const Variable &src_var, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().value().IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor in output variable %s get from " "RunProgram(Grad)Op's " "internal scope is not initialized.", var_name)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The RunProgram(Grad)Op only support output " "variable of type phi::DenseTensor or SelectedRows, " "but received variable %s's type is %s", @@ -173,10 +173,10 @@ static void ShareVarsFromScope(const std::vector &vars, auto *var = scope->FindVar(var_names[i]); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("The output variable %s is not in " - "RunProgram(Grad)Op'" - "s internal scope.", - var_names[i])); + phi::errors::NotFound("The output variable %s is not in " + "RunProgram(Grad)Op'" + "s internal scope.", + var_names[i])); CheckOutputVarStatus(*var, *vars[i], var_names[i]); VariableShare(*var, vars[i]); } @@ -312,14 +312,14 @@ class RunProgramOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( use_cuda_graph, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If not provide OutScope then must run under cuda graph mode.")); inner_scope = std::make_unique(); } else { PADDLE_ENFORCE_EQ( out_scope_vec->size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should only hold one scope.")); } @@ -511,7 +511,7 @@ class RunProgramGradOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( out_scope_vec->size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should only hold one scope.")); framework::Scope *global_inner_scope = out_scope_vec->front(); @@ -519,7 +519,7 @@ class RunProgramGradOpKernel : public framework::OpKernel { VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num; PADDLE_ENFORCE_GT(sub_scope_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should hold at " "least one sub scope.")); diff --git a/paddle/fluid/operators/sampling_id_op.cc b/paddle/fluid/operators/sampling_id_op.cc index 5df5270976ca4..0a06ccd407dfd 100644 --- a/paddle/fluid/operators/sampling_id_op.cc +++ b/paddle/fluid/operators/sampling_id_op.cc @@ -27,7 +27,7 @@ class SamplingIdOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( ctx->Attrs().Get("min"), ctx->Attrs().Get("max"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "min must less then max, but here min is %f, max is %f", ctx->Attrs().Get("min"), ctx->Attrs().Get("max"))); @@ -36,7 +36,7 @@ class SamplingIdOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X, Filter) should be 2-D tensor. But X dim is %d", input_dims.size())); diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index 730d84c2a651e..b26d1743b628e 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -38,13 +38,13 @@ class SamplingIdKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( batch_size, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "batch_size(dims[0]) must be nonnegative. but it is %d.", batch_size)); PADDLE_ENFORCE_GE( width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "width(dims[1]) must be nonnegative. but it is %d.", width)); std::vector ins_vector; diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index f5c3fb9969f1e..60c844678924b 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -185,7 +185,7 @@ class SaveCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT(inp_var_names.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of variables to be saved is %d, expect " "it to be greater than 0.", inp_var_names.size())); @@ -199,12 +199,12 @@ class SaveCombineOpKernel : public framework::OpKernel { for (size_t i = 0; i < inp_vars.size(); i++) { PADDLE_ENFORCE_NOT_NULL( inp_vars[i], - platform::errors::InvalidArgument( - "Cannot find variable %s to save.", inp_var_names[i])); + phi::errors::InvalidArgument("Cannot find variable %s to save.", + inp_var_names[i])); PADDLE_ENFORCE_EQ( inp_vars[i]->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SaveCombine operator only supports saving " "phi::DenseTensor or Vocab variable, %s has wrong type.", inp_var_names[i])); @@ -222,12 +222,12 @@ class SaveCombineOpKernel : public framework::OpKernel { for (size_t i = 0; i < inp_vars.size(); i++) { PADDLE_ENFORCE_NOT_NULL( inp_vars[i], - platform::errors::InvalidArgument( - "Cannot find variable %s to save.", inp_var_names[i])); + phi::errors::InvalidArgument("Cannot find variable %s to save.", + inp_var_names[i])); PADDLE_ENFORCE_EQ( inp_vars[i]->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SaveCombine operator only supports saving " "phi::DenseTensor or Vocab variable, %s has wrong type.", inp_var_names[i])); diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index abf8365182483..579f31fbcf388 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -145,7 +145,7 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) { } #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \ defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) - PADDLE_THROW(platform::errors::Unimplemented("axpy is not supported")); + PADDLE_THROW(phi::errors::Unimplemented("axpy is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); @@ -175,7 +175,7 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) { } #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \ defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) - PADDLE_THROW(platform::errors::Unimplemented("axpy_noadd is not supported")); + PADDLE_THROW(phi::errors::Unimplemented("axpy_noadd is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); @@ -194,7 +194,7 @@ inline void axpy_noadd(const int8_t* x, int8_t* y, size_t len, const float alpha) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "int8_t input of axpy_noadd is not supported")); } diff --git a/paddle/fluid/operators/select_input_op.cc b/paddle/fluid/operators/select_input_op.cc index 3b00aab8c8e89..8383a8bec3bd3 100644 --- a/paddle/fluid/operators/select_input_op.cc +++ b/paddle/fluid/operators/select_input_op.cc @@ -43,7 +43,7 @@ class SelectInputOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( output_branch, x_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Mask' in SelectInputOp is invalid. " "'Mask' must be less than the size of input vector 'X'. " "But received Mask = %d, X's size = %d.", diff --git a/paddle/fluid/operators/select_op_helper.h b/paddle/fluid/operators/select_op_helper.h index 2b7f884f6170c..33c5879de71f2 100644 --- a/paddle/fluid/operators/select_op_helper.h +++ b/paddle/fluid/operators/select_op_helper.h @@ -28,7 +28,7 @@ namespace operators { inline int GetBranchNumber(const phi::DenseTensor &mask) { PADDLE_ENFORCE_EQ(mask.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Mask) in SelectInputOp or " "SelectOutputOp must be 1. " "But received %d, and it's shape is [%s].", @@ -43,7 +43,7 @@ inline int GetBranchNumber(const phi::DenseTensor &mask) { defined(PADDLE_WITH_CUSTOM_DEVICE) || defined(PADDLE_WITH_XPU) framework::TensorCopySync(mask, platform::CPUPlace(), cpu_mask.get()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "This version of PaddlePaddle does NOT support GPU, " "but got GPU tensor 'Mask' in SelectInputOp or SelectOutputOp. " "Please compile PaddlePaddle WITH_GPU first.")); diff --git a/paddle/fluid/operators/select_output_op.cc b/paddle/fluid/operators/select_output_op.cc index 623d1bb5c6ce9..8f61ef7bb712a 100644 --- a/paddle/fluid/operators/select_output_op.cc +++ b/paddle/fluid/operators/select_output_op.cc @@ -55,7 +55,7 @@ class SelectOutputOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( output_branch, out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Mask' in SelectOutputOp is invalid. " "'Mask' must be less than the size of output vector 'Out'. " "But received Mask = %d, Out's size = %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc index dd65162b3aad4..2be8656cefed3 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc @@ -45,17 +45,17 @@ class SequenceConcatOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( context->HasInputs("X"), true, - platform::errors::NotFound("SequenceConcatOp Input(X) of Sequence " - "Concat Op should not be null.")); + phi::errors::NotFound("SequenceConcatOp Input(X) of Sequence " + "Concat Op should not be null.")); PADDLE_ENFORCE_EQ( context->HasOutput("Out"), true, - platform::errors::NotFound("SequenceConcatOp Output(Out) of Sequence " - "Concat Op should not be null.")); + phi::errors::NotFound("SequenceConcatOp Output(Out) of Sequence " + "Concat Op should not be null.")); PADDLE_ENFORCE_GT(context->Inputs("X").size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of SequenceConcatOp inputs should be " "greater than 1. But " "the number of inputs we received is %d", @@ -72,7 +72,7 @@ class SequenceConcatOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NE( x_dim[0], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of SequenceConcatOp inputs must not be 0.")); if (feature_size == 0) { feature_size = common::product(x_dim) / x_dim[0]; @@ -80,7 +80,7 @@ class SequenceConcatOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( feature_size, common::product(x_dim) / x_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each input of SequenceConcatOp inputs must have same feature " "size, But " "the feature size we received is %d, the feature size of 1st " diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h index 463cadc3ce733..3c2ac70eed0fe 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h @@ -53,9 +53,9 @@ inline std::vector> GetDataVectorSafely( std::vector> result; result.reserve(vec.size()); for (auto *ptr : vec) { - PADDLE_ENFORCE_NOT_NULL(ptr, - platform::errors::InvalidArgument( - "The input variable X contains nullptr.")); + PADDLE_ENFORCE_NOT_NULL( + ptr, + phi::errors::InvalidArgument("The input variable X contains nullptr.")); result.emplace_back(*ptr); } return result; @@ -75,14 +75,14 @@ class SeqConcatKernel : public framework::OpKernel { if (lod_size == 0) { PADDLE_ENFORCE_EQ(x.get().lod().empty(), false, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) Tensor of SequenceConcatOp does not " "contain LoD information.")); lod_size = x.get().lod()[0].size(); } else { PADDLE_ENFORCE_EQ(lod_size, x.get().lod()[0].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The lod size of each input must be the same, " "But the lod size of input we received is %d, " "the first input is %d", @@ -93,7 +93,7 @@ class SeqConcatKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( lod_size, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each input must have sequence lod information. But we " "received input lod size is %d", lod_size)); @@ -116,7 +116,7 @@ class SeqConcatGradKernel : public framework::OpKernel { context.MultiOutput(framework::GradVarName("X")); PADDLE_ENFORCE_EQ(xs.size(), dxs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input X and Output Grad X must be " "same, But the rank of Input X we received is %d, " "the rank of Output Grad X is %d", diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc index c94f57807cd52..24109e8ed4531 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc @@ -40,14 +40,14 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->Attrs().Get("contextStride"), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Currently, SequenceConvOp only supports contextStride=1. But " "received contextStride = %u.", ctx->Attrs().Get("contextStride"))); PADDLE_ENFORCE_EQ( in_dims.size() == 2 && filter_dims.size() == 2, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X, Filter) should be 2-D tensor. But received Input(X): " "input rank %u, input shape [%s]; received Input(Filter): " "input rank %u, input shape [%s].", @@ -58,7 +58,7 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( filter_dims[0], context_length * in_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Filter's height should be context_length * " "input_hidden_size. But received: filter's height = %d, " "context_length * input_hidden_size = %d.", @@ -82,13 +82,13 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( start_length, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If context_start is 0 and context_length is 1, paddingTrainable " "should be false.")); PADDLE_ENFORCE_EQ( padding_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(PaddingData) should be 2-D tensor. But received: " "input rank %u, input shape [%s].", padding_dim.size(), @@ -96,14 +96,14 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( padding_dim[0] == total_pad && padding_dim[1] == input_width, true, - platform::errors::InvalidArgument("Input(PaddingData)'s shape is not " - "consistent with 'context_start' " - "and 'context_length'. Received " - "Input(PaddingData): input rank " - "%u, " - "input shape [%s].", - padding_dim.size(), - padding_dim)); + phi::errors::InvalidArgument("Input(PaddingData)'s shape is not " + "consistent with 'context_start' " + "and 'context_length'. Received " + "Input(PaddingData): input rank " + "%u, " + "input shape [%s].", + padding_dim.size(), + padding_dim)); } in_dims[1] = filter_dims[1]; diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index 347db6e37db10..b7820f5dda0a7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -39,13 +39,13 @@ class SequenceConvKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -107,7 +107,7 @@ class SequenceConvGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc index 53fb13180c36a..94f65ecd1c6e4 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc @@ -37,13 +37,13 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -51,19 +51,19 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( padding_trainable, false, - platform::errors::InvalidArgument("Only support padding_trainable " - "equal false.")); + phi::errors::InvalidArgument("Only support padding_trainable " + "equal false.")); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); PADDLE_ENFORCE_EQ( up_pad, 2, - platform::errors::InvalidArgument("Only support up_pad equal 2.")); + phi::errors::InvalidArgument("Only support up_pad equal 2.")); PADDLE_ENFORCE_EQ( down_pad, 2, - platform::errors::InvalidArgument("Only support down_pad equal 2.")); + phi::errors::InvalidArgument("Only support down_pad equal 2.")); auto xpu_context = context.template device_context().x_context(); @@ -73,8 +73,8 @@ class SequenceConvXPUKernel : public framework::OpKernel { xpu::ctx_guard RAII_GUARD(xpu_context); int col_numel = col_shape[0] * col_shape[1]; T* col_data = RAII_GUARD.alloc_l3_or_gm(col_numel); - PADDLE_ENFORCE_NOT_NULL( - col_data, paddle::platform::errors::Fatal("XPU memory is not enough")); + PADDLE_ENFORCE_NOT_NULL(col_data, + phi::errors::Fatal("XPU memory is not enough")); auto lod_level_0 = in->lod()[0]; int lod_size = lod_level_0.size(); @@ -84,7 +84,7 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( lod_size, 257, - platform::errors::InvalidArgument("Only support batch size <= 256.")); + phi::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { @@ -113,7 +113,7 @@ class SequenceConvXPUKernel : public framework::OpKernel { int n = filter.dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", @@ -173,13 +173,13 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -187,26 +187,26 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( padding_trainable, false, - platform::errors::InvalidArgument("Only support padding_trainable " - "equal false.")); + phi::errors::InvalidArgument("Only support padding_trainable " + "equal false.")); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); PADDLE_ENFORCE_EQ( up_pad, 2, - platform::errors::InvalidArgument("Only support up_pad equal 2.")); + phi::errors::InvalidArgument("Only support up_pad equal 2.")); PADDLE_ENFORCE_EQ( down_pad, 2, - platform::errors::InvalidArgument("Only support down_pad equal 2.")); + phi::errors::InvalidArgument("Only support down_pad equal 2.")); auto lod_level_0 = in->lod()[0]; int lod_size = lod_level_0.size(); PADDLE_ENFORCE_LE( lod_size, 257, - platform::errors::InvalidArgument("Only support batch size <= 256.")); + phi::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { @@ -223,8 +223,8 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { xpu::ctx_guard RAII_GUARD(xpu_context); int col_numel = col_shape[0] * col_shape[1]; T* col_data = RAII_GUARD.alloc_l3_or_gm(col_numel); - PADDLE_ENFORCE_NOT_NULL( - col_data, paddle::platform::errors::Fatal("XPU memory is not enough")); + PADDLE_ENFORCE_NOT_NULL(col_data, + phi::errors::Fatal("XPU memory is not enough")); if (in_g || filter_g) { bool trans_a = false; @@ -235,7 +235,7 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { int k1 = filter->dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvGradOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", @@ -273,10 +273,10 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { } if (in_g) { - PADDLE_ENFORCE_LT(sequence_width, - 512, - platform::errors::InvalidArgument( - "Only support sequence_width < 512.")); + PADDLE_ENFORCE_LT( + sequence_width, + 512, + phi::errors::InvalidArgument("Only support sequence_width < 512.")); in_g->mutable_data(context.GetPlace()); in_g->set_lod(in->lod()); @@ -317,7 +317,7 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { int n = out_g->dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvGradOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc index 9ff5f1f96f389..de2949b7f75d7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc @@ -45,7 +45,7 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const int& win_size) { PADDLE_ENFORCE_GE(win_size, 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The window size should be not less than 2." "Received window size is %d", win_size)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu index 7884232e5b10f..7beadb26487eb 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu @@ -62,7 +62,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(in_dims[0]), in_lod[0].back(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual input data's size mismatched with LoD information." "Received input data size is %d (actual) vs %d (loD information).", static_cast(in_dims[0]), diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h index c66f4065a58f1..9f89476d8642a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h @@ -31,7 +31,7 @@ class SequenceEnumerateKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceEnumerateOp does not contain " "LoD information.")); @@ -40,7 +40,7 @@ class SequenceEnumerateKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(in_dims[0]), lod0.back(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual input data's size mismatched with LoD information." "Received input data size is %d (actual) vs %d (loD information).", static_cast(in_dims[0]), @@ -48,13 +48,13 @@ class SequenceEnumerateKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SequenceEnumerate operator's rank should be 2." "Received %d instead.", in_dims.size())); PADDLE_ENFORCE_EQ(in_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SequenceEnumerate operator's 2nd " "dimension should be 1. Received %d instead.", in_dims[1])); diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc index 7f6eeff11b5be..03edbdc1a5d04 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc @@ -29,7 +29,7 @@ class SequenceEraseOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE( x_dims.size() == 2 && x_dims[1] == 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SequenceEraseOp should be a 2-D phi::DenseTensor " "with the 2nd dimension equal to 1," "but received size %d with the 2nd dimension %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index bbc80587a9cf7..8b4b76a762d94 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -73,7 +73,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( lod[lod.size() - 1].back(), (size_t)in->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual size mismatches with the LoD information.")); auto tokens = ctx.Attr>("tokens"); auto in_len = in->numel(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h index 9a4aef1d93ab4..505c4245155ad 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h @@ -32,11 +32,11 @@ class SequenceEraseKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( lod.empty(), false, - platform::errors::InvalidArgument("Input(X) Tensor of SequenceEraseOp " - "does not contain LoD information.")); + phi::errors::InvalidArgument("Input(X) Tensor of SequenceEraseOp " + "does not contain LoD information.")); PADDLE_ENFORCE_EQ(lod[lod.size() - 1].back(), static_cast(in->numel()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual input size %d mismatches with the LoD " "information size %d.", lod[lod.size() - 1].back(), diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc index 1f9fd565ca77c..86c08d79d0332 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc @@ -35,7 +35,7 @@ class SequenceExpandAsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension number of Input(X) should be at least 2. " "But received X's dimensions = %d, X's shape = [%s].", x_dims.size(), @@ -52,14 +52,14 @@ class SequenceExpandAsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(y_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level number of Input(Y)'s lod should be 1. But " "received Y's lod level = %d.", y_lod.size())); PADDLE_ENFORCE_EQ(static_cast(x_dim[0]), y_lod[0].size() - 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(X) should be one " "less than the size of Input(Y)'s 0 level lod. But " "received X's shape[0] = %d, Y's lod[0].size = %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h index d9a1d419f5a9e..81076908a6b62 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h @@ -78,21 +78,21 @@ class SequenceExpandAsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( y->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) of SequenceExpandAsOp has wrong LoD information. " "Expected Y's lod is not empty, but received empty lod.")); auto &y_lod = y->lod(); PADDLE_ENFORCE_EQ(y_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) of SequenceExpandAsOp has wrong LoD " "information. Expected Y's lod level = 1, but " "received lod level = %d.", y_lod.size())); PADDLE_ENFORCE_GT(y_lod[0].size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) of SequenceExpandAsOp has wrong LoD " "information. Expected the size of Y's lod[0] > 1, " "but received lod[0].size = %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc index e6a64be83473d..4e7deab77952a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc @@ -38,7 +38,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension number of Input(X) should be at least 2. But " "received: input rank %u, input shape [%s].", x_dims.size(), @@ -55,14 +55,14 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE(x_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level of Input(X)'s lod should not be " "greater than 1. But received: lod level %u.", x_lod.size())); PADDLE_ENFORCE_GT( y_lod.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level of Input(Y)'s lod should be greater than 0. But " "received: lod level %u.", y_lod.size())); @@ -70,7 +70,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ref_level == -1 || (ref_level >= 0 && ref_level < static_cast(y_lod.size())), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid `ref_level`, which should be either equal to -1 " "or in [0, %d), but received `ref_level` = %u.", y_lod.size(), @@ -82,7 +82,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_lod[0].size(), y_lod[ref_level].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level number of Input(X)'s lod could be 0. Otherwise " "size of Input(X)'s first level lod should be equal to " "size of Input(Y)'s referred level lod. But received: " @@ -95,7 +95,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], static_cast(y_lod[ref_level].size()) - 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When Input(X)'s lod is null, the dims[0] of " "Input(X) should match the " "size of Input(Y)'s referred level lod. But received " diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 0f53249cfbc24..1204775c44226 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -96,7 +96,7 @@ class SequenceExpandKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) phi::DenseTensor of SequenceExpandOp does not contain " "LoD information.")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc index a6cd59e44dff0..a123ae14f39b1 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc @@ -61,7 +61,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( v < 0 || v >= 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(maxlen) must be less than 0 or larger than 1")); }); AddAttr("out_dtype", "Output data type"); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc index d033ac210c7c8..f65dc988bfebd 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc @@ -26,29 +26,29 @@ class SequencePadOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequencePadOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of SequencePadOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("PadValue"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(PadValue) of SequencePadOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of SequencePadOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Length"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Length) of SequencePadOp should not be null.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequencePadOp Input(X) can't be less " "than 2. But the rank we received is %d", x_dims.size())); @@ -59,7 +59,7 @@ class SequencePadOp : public framework::OperatorWithKernel { pad_value_dims == common::make_ddim({}) || pad_value_dims == time_step_dims, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The SequencePadOp Input(PadValue) must be a scalar or a tensor " "whose shape equals to time steps in sequences")); @@ -73,19 +73,19 @@ class SequencePadOp : public framework::OperatorWithKernel { const auto& x_lod = x_var->Get().lod(); PADDLE_ENFORCE_EQ(x_lod.empty(), false, - platform::errors::NotFound( + phi::errors::NotFound( "The SequencePadOp Input(X) must hold lod info.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE( x_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of SequencePadOp Input(X)'s lod info can't be less " "than 2. But the size we received is %d", x_lod_0.size())); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The SequencePadOp Input(X)'s lod info mismatches " "the actual tensor shape. The 1st dimension of " "Input(X)'s lod info is %d, the 1st dimension of " @@ -102,7 +102,7 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( padded_length, max_seq_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The SequencePadOp Attr(padded_length) should be greater than or " "equal to the " "length of the longest original sequence. But the padded_length " @@ -119,7 +119,7 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( ctx->GetLoDLevel("X"), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of SequencePadOp Input(X) should be " "larger than 0. But the LoD level we received is %d", ctx->GetLoDLevel("X"))); @@ -233,12 +233,12 @@ class SequencePadGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequencePadGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequencePadGradOp should not be null.")); if (ctx->HasOutput(framework::GradVarName("X"))) { diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h index d31611b94a658..dd15ff4c9935d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h @@ -36,7 +36,7 @@ class SequencePadOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(x->lod().empty(), false, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) phi::DenseTensor of SequencePadOp does not " "contain LoD information.")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc index d616bca2c4e3b..2d58d2b32276f 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc @@ -31,10 +31,10 @@ class SequencePoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( in_lod_level, 0, - platform::errors::InvalidArgument("The LoD level of Input(X) should " - "be larger than 0, but received: " - "lod level %u.", - in_lod_level)); + phi::errors::InvalidArgument("The LoD level of Input(X) should " + "be larger than 0, but received: " + "lod level %u.", + in_lod_level)); ctx->SetLoDLevel("Out", in_lod_level - 1); } @@ -126,7 +126,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of output grad must equal to Input(X). But " "received: input rank %u, input shape [%s].", og_dims.size(), @@ -135,7 +135,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( og_dims[i], x_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc index 6e34f76fbd37d..23ce04ca74262 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc @@ -27,18 +27,18 @@ class SequenceReshapeOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceReshapeOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of SequenceReshapeOp should not be null.")); auto x_dims = ctx->GetInputDim("X"); auto x_numel = product(x_dims); PADDLE_ENFORCE_EQ(x_dims.size(), 2U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequenceReshapeOp Input(X) should be 2. " "But the rank we received is %d", x_dims.size())); @@ -105,12 +105,12 @@ class SequenceReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequenceReshapeGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceReshapeGradOp should not be null.")); ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h index de530bed0d663..e506b310ea2bb 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h @@ -32,14 +32,14 @@ class SequenceReshapeKernel : public framework::OpKernel { int64_t in_width = in_dims[1]; auto& in_lod = in->lod(); - PADDLE_ENFORCE_EQ(in_lod.empty(), - false, - platform::errors::NotFound( - "Input(X) Tensor of SequenceReshapeOp does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + in_lod.empty(), + false, + phi::errors::NotFound("Input(X) Tensor of SequenceReshapeOp does not " + "contain LoD information.")); PADDLE_ENFORCE_EQ(in_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) Tensor of SequenceReshapeOp Only support " "one level sequence now. But lod size " "of Input(X) is %d", @@ -47,7 +47,7 @@ class SequenceReshapeKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( (uint64_t)in_dims[0], in_lod[0].back(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of SequenceReshapeOp X.shape[0] and X.lod()[0].back() " "should " "be same. But X.shape[0] = %d, X.lod()[0].back() = %d", @@ -71,7 +71,7 @@ class SequenceReshapeKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( offset * out_width, seq_len * in_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Please make sure (sequence_length * dimension) " "can be divided by context Attr(new_dim) with no remainder for " "each sequence. But the %dth sequence is invalid.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h index 2236988025cbc..5b2d22218adf8 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h @@ -31,17 +31,17 @@ class SequenceReverseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("Input(X) of SequenceReverse must exist")); + phi::errors::NotFound("Input(X) of SequenceReverse must exist")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Y"), true, - platform::errors::NotFound("Output(Y) of SequenceReverse must exist")); + phi::errors::NotFound("Output(Y) of SequenceReverse must exist")); auto x_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE( x_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequenceReverseOp Input(X) must be greater " "than or equal to 2. But the Input(X) tensor's rank we received is " "%d", @@ -120,15 +120,15 @@ class SequenceReverseOpKernel : public framework::OpKernel { auto &x = *ctx.Input("X"); auto *y = ctx.Output("Y"); - PADDLE_ENFORCE_EQ(x.lod().empty(), - false, - platform::errors::NotFound( - "Input(X) Tensor of SequenceReverseOp does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + x.lod().empty(), + false, + phi::errors::NotFound("Input(X) Tensor of SequenceReverseOp does not " + "contain LoD information.")); PADDLE_ENFORCE_EQ(x.lod().size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SequenceReverseOp only support one " "level lod. But the Input(X) lod size is %d", x.lod().size())); @@ -156,7 +156,7 @@ class SequenceReverseOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( x_data, y_data, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SequenceReverse Op does not support in-place operation")); if (platform::is_cpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc index cf7e549134cd0..0e44a0cace16a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc @@ -90,7 +90,7 @@ class SequenceScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( updates_dim[0], ids_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of SequenceScatter operator's input Updates and Ids do " "not match, receive Updates's shape is [%s], Ids's shape is [%s].", updates_dim, @@ -108,7 +108,7 @@ class SequenceScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ids_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The SequenceScatter operator’s Input Ids holds wrong LoD " "information. Currently SequenceScatter operator can only deal " "with one level LoD for input Ids, but received LoD level is %d.", @@ -116,7 +116,7 @@ class SequenceScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( updates_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The SequenceScatter operator’s Input Updates holds wrong LoD " "information. Currently SequenceScatter operator can only deal " "with one level LoD for input Updates, but received LoD level is " diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h index 389b630015e6f..8c45879d57d72 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h @@ -35,7 +35,7 @@ class SequenceScatterOpKernel : public framework::OpKernel { auto& ids_lod = ids->lod(); PADDLE_ENFORCE_EQ(ids_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) Tensor of SequenceScatter operator does " "not contain LoD information.")); @@ -49,7 +49,7 @@ class SequenceScatterOpKernel : public framework::OpKernel { for (int i = 0; i < x_dims.size(); ++i) PADDLE_ENFORCE_EQ(x_dims[i], out_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) and output(Out) shape of SequenceScatter " "operator do not match. Received input(X)'s shape " "is [%s], output(Out)'s shape is [%s].", @@ -65,12 +65,12 @@ class SequenceScatterOpKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( seg, lod_vec.size() - 1, - platform::errors::OutOfRange("The segment index is out of bound in " - "SequenceScatter operator, it must be " - "less than batch size. The segment " - "index is %d, the batch size is %d.", - seg, - lod_vec.size())); + phi::errors::OutOfRange("The segment index is out of bound in " + "SequenceScatter operator, it must be " + "less than batch size. The segment " + "index is %d, the batch size is %d.", + seg, + lod_vec.size())); int lower_bound = lod_vec[seg]; int upper_bound = lod_vec[seg + 1]; if (i >= lower_bound && i < upper_bound) { @@ -93,9 +93,9 @@ class SequenceScatterGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_cpu_place(ctx.GetPlace()), true, - platform::errors::Unimplemented("Device dose not match. The " - "SequenceScatterGradientOpKernel can " - "only run on CPU device.")); + phi::errors::Unimplemented("Device dose not match. The " + "SequenceScatterGradientOpKernel can " + "only run on CPU device.")); auto* dX = ctx.Output(framework::GradVarName("X")); auto* dUpdates = ctx.Output(framework::GradVarName("Updates")); auto* ids = ctx.Input("Ids"); @@ -113,7 +113,7 @@ class SequenceScatterGradientOpKernel : public framework::OpKernel { for (int i = 0; i < dx_dims.size(); ++i) PADDLE_ENFORCE_EQ(dx_dims[i], dout_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Out@GRAD) and output(X@GRAD) shape of " "SequenceScatterGradient operator do not match. " "Received input(Out@GRAD)'s shape is [%s], " @@ -131,7 +131,7 @@ class SequenceScatterGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( seg, lod_vec.size() - 1, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "The segment index is out of bound in SequenceScatterGradient " "operator, it must be less than batch size. The segment index is " "%d, the batch size is %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc index ed6e53b9ca7e8..701727a2cf4ca 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc @@ -36,7 +36,7 @@ class SequenceSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( offset_dim.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input Offset dimension error. SequenceSlice operator only support " "one level sequence now, the dimension of input Offset must be 2, " "but received dimension is %d.", @@ -44,7 +44,7 @@ class SequenceSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( length_dim.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input Length dimension error. SequenceSlice operator only support " "one level sequence now, the dimension of input Length must be 2, " "but received dimension is %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h index 50a3e97633475..ee826570b37e7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h @@ -52,14 +52,14 @@ class SequenceSliceOpKernel : public framework::OpKernel { auto lod = in->lod(); PADDLE_ENFORCE_EQ(lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) Tensor of SequenceSlice operator does not " "contain LoD information.")); PADDLE_ENFORCE_EQ( lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LoD information error. SequenceSlice operator only support one " "level sequence now, but received LoD level is %d.", lod.size())); @@ -67,7 +67,7 @@ class SequenceSliceOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( n, static_cast(length->dims()[0]), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input length shape error. The length of input LoD sequence and " "input length-array‘s first dimension should be equal, but the LoD " "sequence length is %d, the length-array‘s first dimension is %d.", @@ -76,7 +76,7 @@ class SequenceSliceOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( n, static_cast(offset->dims()[0]), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input offset shape error. The length of input LoD sequence and " "input offset-array‘s first dimension should be equal, but the LoD " "sequence length is %d, the offset-array‘s first dimension is %d.", @@ -101,14 +101,14 @@ class SequenceSliceOpKernel : public framework::OpKernel { for (size_t i = 0; i < n; ++i) { PADDLE_ENFORCE_LE(0, offset_data[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input offset[%d]'s value is negative, its " "value is %d, expect it to be non-negative.", i, offset_data[i])); PADDLE_ENFORCE_LE(0, length_data[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input length[%d]'s value is negative, its " "value is %d, expect it to be non-negative.", i, @@ -116,7 +116,7 @@ class SequenceSliceOpKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( lod[0][i] + offset_data[i] + length_data[i], lod[0][i + 1], - platform::errors::OutOfRange( + phi::errors::OutOfRange( "The slice end index of target tensor is out of range. expect it " "less than or equal to %d, but the actual slice end index is %d.", lod[0][i + 1], diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index 01f7bb3e92890..0a4d5a69a8e2b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -36,7 +36,7 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], static_cast(lod[level].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(X) should be equal to the sum of all " "sequences' lengths. But received first dimension of Input(X) is " "%d, the sum of all sequences' lengths is %d.", @@ -44,7 +44,7 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel { static_cast(lod[level].back()))); PADDLE_ENFORCE_EQ(dims[0], x->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of each timestep in Input(X) of " "SequenceSoftmaxOp should be 1.")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 12d4f72a91169..5fbbd49a88521 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -111,7 +111,7 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( out_dim, out_grad_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Out) and Input(Out@GRAD) of " "SequenceSoftmaxGrad operator do not match. The Input(Out)'s shape " "is [%s], the Input(Out@GRAD)'s shape is [%s].", diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h index 6c6f1b69c8196..ee372e6a9d382 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h @@ -97,7 +97,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { auto dims = x->dims(); PADDLE_ENFORCE_EQ(lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceSoftmax " "operator does not contain " "LoD information.")); @@ -106,7 +106,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], static_cast(lod[level].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(X) should be equal to the sum of all " "sequences' lengths. But the first dimension of Input(X) is %d, " "the sum of all sequences' lengths is %d.", @@ -115,7 +115,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], x->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of each timestep in Input(X) of SequenceSoftmax " "operator should be 1. But the first dimension of Input(X) is %d, " "the number of elements is %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc index 4b19faea335bf..5520cf3227d71 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc @@ -28,23 +28,23 @@ class SequenceUnpadOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceUnpadOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Length"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Length) of SequenceUnpadOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of SequenceUnpadOp should not be null.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(X) can't be less than 2. But the " "rank we received is %d", x_dims.size())); @@ -52,14 +52,14 @@ class SequenceUnpadOp : public framework::OperatorWithKernel { auto len_dims = ctx->GetInputDim("Length"); PADDLE_ENFORCE_EQ(len_dims.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequenceUnpadOp Input(Length) should " "be 1. But the rank we received is %d", len_dims.size())); PADDLE_ENFORCE_EQ( len_dims[0], x_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of SequenceUnpadOp Input(X) and Input(Length)" "should be same. But the 1st dimension of " "Input(X) is %d, Input(Length) is %d", @@ -142,12 +142,12 @@ class SequenceUnpadGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceUnpadGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequenceUnpadGradOp should not be null.")); if (ctx->HasOutput(framework::GradVarName("X"))) { diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index 5eeb356817a2a..268f7457f2136 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -189,7 +189,7 @@ class SetValueGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( in_dims.size(), 7, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of set_value_grad operator's input should be less " "than 7, but received dimension is %d.", in_dims.size())); diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 7d5fd042bb0fb..85022ead0e905 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -22,10 +22,10 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/assign_value_op.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { @@ -68,7 +68,7 @@ inline void CheckIsDimsMatch(const framework::DDim first, return; } } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The shape of tensor assigned value must match the shape " "of target shape: %d, but now shape is %d.", second.to_str(), diff --git a/paddle/fluid/operators/share_data_op.cc b/paddle/fluid/operators/share_data_op.cc index b780ccba920c0..4accee24e17fa 100644 --- a/paddle/fluid/operators/share_data_op.cc +++ b/paddle/fluid/operators/share_data_op.cc @@ -33,12 +33,12 @@ class ShareDataOp : public framework::OperatorWithKernel { in_type == framework::proto::VarType::LOD_TENSOR || in_type == framework::proto::VarType::SELECTED_ROWS, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of Variable[X] must be phi::DenseTensor or SelectedRows!")); PADDLE_ENFORCE_EQ( in_type, out_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The type of input (X) and output (Out) are inconsistent.")); ctx->ShareDim("X", "Out"); @@ -80,4 +80,4 @@ PD_REGISTER_STRUCT_KERNEL(share_data, int64_t, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/share_data_op.cu b/paddle/fluid/operators/share_data_op.cu index 7e67b491834ea..2b1c32d655b80 100644 --- a/paddle/fluid/operators/share_data_op.cu +++ b/paddle/fluid/operators/share_data_op.cu @@ -27,4 +27,4 @@ PD_REGISTER_STRUCT_KERNEL(share_data, int64_t, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index e883ba8e83092..ddcf20255efe1 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -40,15 +40,15 @@ class ShrinkRNNMemoryOp : public ArrayOp { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { auto *x_var = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL(x_var, - platform::errors::NotFound( - "Input(X) of ShrinkRNNMemoryOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + x_var, + phi::errors::NotFound("Input(X) of ShrinkRNNMemoryOp is not found.")); auto &x_tensor = x_var->Get(); size_t offset = this->GetOffset(scope, place); auto *rank_table_var = scope.FindVar(Input("RankTable")); PADDLE_ENFORCE_NOT_NULL( rank_table_var, - platform::errors::NotFound( + phi::errors::NotFound( "Input(RankTable) of ShrinkRNNMemoryOp is not found.")); auto &rank_table = rank_table_var->Get(); @@ -64,7 +64,7 @@ class ShrinkRNNMemoryOp : public ArrayOp { auto *out_var = scope.FindVar(Output("Out")); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of ShrinkRNNMemoryOp is not found.")); auto &out_tensor = *out_var->GetMutable(); @@ -148,12 +148,12 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto *dx_var = scope.FindVar(Output(framework::GradVarName("X"))); PADDLE_ENFORCE_NOT_NULL( dx_var, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X@GRAD) of ShrinkRNNMemoryGradOp is not found.")); auto *x_var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::NotFound( + phi::errors::NotFound( "Input(x) of ShrinkRNNMemoryGradOp is not found.")); auto &x_tensor = x_var->Get(); auto &dx_tensor = *dx_var->GetMutable(); diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc index 0b5a7bf5540ab..1f1415aa995fd 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ b/paddle/fluid/operators/shuffle_batch_op.cc @@ -38,26 +38,23 @@ class ShuffleBatchOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::NotFound("Input(X) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Seed"), - true, - platform::errors::NotFound("Input(Seed) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - platform::errors::NotFound("Output(Out) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Seed"), + true, + phi::errors::NotFound("Input(Seed) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("ShuffleIdx"), true, - platform::errors::NotFound("Output(ShuffleIdx) should not be null.")); + phi::errors::NotFound("Output(ShuffleIdx) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SeedOut"), true, - platform::errors::NotFound("Output(SeedOut) should not be null.")); + phi::errors::NotFound("Output(SeedOut) should not be null.")); ctx->ShareDim("X", "Out"); ctx->ShareLoD("X", "Out"); @@ -122,15 +119,15 @@ class ShuffleBatchOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("ShuffleIdx"), true, - platform::errors::NotFound("Input(ShuffleIdx) should not be null")); + phi::errors::NotFound("Input(ShuffleIdx) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound("Grad Input(Out) should not be null")); + phi::errors::NotFound("Grad Input(Out) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::NotFound("Grad Output(X) should not be null")); + phi::errors::NotFound("Grad Output(X) should not be null")); ctx->ShareDim(framework::GradVarName("Out"), framework::GradVarName("X")); ctx->ShareLoD(framework::GradVarName("Out"), framework::GradVarName("X")); diff --git a/paddle/fluid/operators/shuffle_channel_op.cc b/paddle/fluid/operators/shuffle_channel_op.cc index c8f9d9469848e..f95bed3bed5ef 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cc +++ b/paddle/fluid/operators/shuffle_channel_op.cc @@ -29,7 +29,7 @@ class ShuffleChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim("Out", input_dims); } @@ -55,10 +55,10 @@ class ShuffleChannelOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("group", "the number of groups.") .SetDefault(1) .AddCustomChecker([](const int& group) { - PADDLE_ENFORCE_GE(group, - 1, - platform::errors::InvalidArgument( - "group should be larger than 0.")); + PADDLE_ENFORCE_GE( + group, + 1, + phi::errors::InvalidArgument("group should be larger than 0.")); }); AddComment(R"DOC( Shuffle Channel operator @@ -83,7 +83,7 @@ class ShuffleChannelGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim(framework::GradVarName("X"), input_dims); } diff --git a/paddle/fluid/operators/similarity_focus_op.cc b/paddle/fluid/operators/similarity_focus_op.cc index 4508459f25514..4889dd9dfbf6b 100644 --- a/paddle/fluid/operators/similarity_focus_op.cc +++ b/paddle/fluid/operators/similarity_focus_op.cc @@ -66,7 +66,7 @@ class SimilarityFocusOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension size of Input(X) be 4, but received %d.", x_dims.size())); ctx->SetOutputDim("Out", x_dims); diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h index 32349e9570369..eea1d1953a4b9 100644 --- a/paddle/fluid/operators/similarity_focus_op.h +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -46,15 +46,15 @@ class SimilarityFocusKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( indexes.size(), 0, - platform::errors::InvalidArgument("The size of Attr(indexes) must be " - "greater than 0, but received %d.", - indexes.size())); + phi::errors::InvalidArgument("The size of Attr(indexes) must be " + "greater than 0, but received %d.", + indexes.size())); for (size_t i = 0; i < indexes.size(); i++) { PADDLE_ENFORCE_GT( dim[axis], indexes[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each value of Attr(indexes) must be less than X.dim[axis], " "but indexes[%d] received %d.", i, @@ -84,13 +84,13 @@ class SimilarityFocusKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( axis, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", axis)); PADDLE_ENFORCE_LT( axis, 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", axis)); memset(out_data, 0, sizeof(T) * batch_size * dim[1] * dim[2] * dim[3]); diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 16b895ce557a7..881e3b59f0db7 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -40,7 +40,7 @@ class SliceOp : public framework::OperatorWithKernel { if (x_var_type == framework::proto::VarType::LOD_TENSOR_ARRAY) { PADDLE_ENFORCE_EQ(axes.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of axes must be 1 when the Input of " "SliceOp is LoDTensorArray, " "but received %d.", @@ -63,7 +63,7 @@ class SliceOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("Input"); PADDLE_ENFORCE_LT(in_dims.size(), 7, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of input should be less than 7.")); framework::DDim out_dims(in_dims); @@ -83,31 +83,31 @@ class SliceOp : public framework::OperatorWithKernel { if (ctx->HasInputs("StartsTensorList")) { starts_size = ctx->Inputs("StartsTensorList").size(); - PADDLE_ENFORCE_GT(starts_size, - 0, - platform::errors::InvalidArgument( - "StartsTensorList size can't be zero")); + PADDLE_ENFORCE_GT( + starts_size, + 0, + phi::errors::InvalidArgument("StartsTensorList size can't be zero")); } if (ctx->HasInputs("EndsTensorList")) { ends_size = ctx->Inputs("EndsTensorList").size(); - PADDLE_ENFORCE_GT(ends_size, - 0, - platform::errors::InvalidArgument( - "EndsTensorList size can't be zero")); + PADDLE_ENFORCE_GT( + ends_size, + 0, + phi::errors::InvalidArgument("EndsTensorList size can't be zero")); } if (!ctx->HasInput("StartsTensor")) { PADDLE_ENFORCE_EQ( starts_size, axes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of starts must be equal to the size of axes.")); } if (!ctx->HasInput("EndsTensor")) { PADDLE_ENFORCE_EQ( ends_size, axes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of ends must be equal to the size of axes.")); } for (auto &axis : axes) { @@ -143,7 +143,7 @@ class SliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_tensor.IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor Input (Input) of Slice op is not initialized.")); // NOTE: cuda pinned tensor need to copy its data to target place if (platform::is_cuda_pinned_place(in_tensor.place())) { @@ -304,14 +304,13 @@ class SliceOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), + true, + phi::errors::InvalidArgument("Input should not be null")); PADDLE_ENFORCE_EQ( - ctx->HasInput("Input"), + ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::InvalidArgument("Input should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) should not be null")); + phi::errors::InvalidArgument("Input(Out@GRAD) should not be null")); auto x_var_type = ctx->GetInputsVarType("Input")[0]; if (x_var_type == framework::proto::VarType::LOD_TENSOR_ARRAY) { // If the var type of input is LOD_TENSOR_ARRAY, diff --git a/paddle/fluid/operators/soft_relu_op.cu b/paddle/fluid/operators/soft_relu_op.cu index 3963b372c9c8e..e4273c73530f6 100644 --- a/paddle/fluid/operators/soft_relu_op.cu +++ b/paddle/fluid/operators/soft_relu_op.cu @@ -39,7 +39,7 @@ PD_REGISTER_STRUCT_KERNEL(soft_relu, ops::SoftReluCudaKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(soft_relu_grad, GPU, @@ -47,5 +47,5 @@ PD_REGISTER_STRUCT_KERNEL(soft_relu_grad, ops::SoftReluGradCudaKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/sparse_attention_op.cc b/paddle/fluid/operators/sparse_attention_op.cc index 26dfc0fbbc64d..6d6a567ab1b61 100644 --- a/paddle/fluid/operators/sparse_attention_op.cc +++ b/paddle/fluid/operators/sparse_attention_op.cc @@ -99,15 +99,15 @@ class SparseAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(dims_q.size(), static_cast(4), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension in query' shapes should be 4.")); - PADDLE_ENFORCE_EQ(dims_k.size(), - static_cast(4), - platform::errors::InvalidArgument( - "Dimension in key' shapes should be 4.")); + PADDLE_ENFORCE_EQ( + dims_k.size(), + static_cast(4), + phi::errors::InvalidArgument("Dimension in key' shapes should be 4.")); PADDLE_ENFORCE_EQ(dims_v.size(), static_cast(4), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension in value' shapes should be 4.")); auto batch_size = dims_q[0]; diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index 117de1c1a55df..ec41a829e7f72 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -314,7 +314,7 @@ void SparseSoftmaxForward(const phi::GPUContext& ctx, columns_data, num_rows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The head_dim of query in sparse_attention op should less or equal " "512")); } @@ -412,7 +412,7 @@ void SparseSoftmaxBackward(const phi::GPUContext& ctx, columns_data, num_rows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The head_dim of query in sparse_attention op should less or equal " "512")); } @@ -425,7 +425,7 @@ inline cudaDataType_t GetGpuType(const VarType::Type data_type) { } else if (data_type == VarType::FP64) { return CUDA_R_64F; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Not support tensor type in sparse_attention OP: %s", framework::DataTypeToString(data_type))); } diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc index 6b79d5c35b783..16e444c6f54ad 100644 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ b/paddle/fluid/operators/split_lod_tensor_op.cc @@ -73,7 +73,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { framework::TensorCopy( mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else - PADDLE_THROW(paddle::platform::errors::Fatal( + PADDLE_THROW(phi::errors::Fatal( "Not support GPU, Please compile WITH_GPU option")); #endif } @@ -177,7 +177,7 @@ class SplitLoDTensorInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( mask_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If you are using IfElse OP:" "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " "ie.true_block():\n out_1 = ie.input(x)\n\n" @@ -188,7 +188,7 @@ class SplitLoDTensorInferShape : public framework::InferShapeBase { "].\n")); PADDLE_ENFORCE_EQ(mask_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If you are using IfElse OP:" "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " "ie.true_block():\n out_1 = ie.input(x)\n\n" diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index ddda1131f5cc7..1a4eace1f3398 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -35,11 +35,11 @@ class SplitOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SplitOp should not be null.")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of SplitOp should not be empty.")); int axis = static_cast(ctx->Attrs().Get("axis")); int num = static_cast(ctx->Attrs().Get("num")); @@ -218,7 +218,7 @@ class SplitCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { std::vector out_grad = this->GetMultiOutputGrad("Out"); if (tensor_axis.is_initialized() || tensor_sections.is_initialized()) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic index or sections from tensor for split " "composite grad for now. ")); } else { diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index aaee366a4636a..e7ba7d0706fd2 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/split_kernel.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_EQ( input_axis_dim % num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input's size along the split dimension " "must be evenly divisible by Attr(num_or_sections). " "But received Attr(num_or_sections) " @@ -75,7 +75,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_LE( num_of_unk, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of Attr(num_or_sections) " "in SplitOp can be -1. " "But received Attr(num_or_sections) = [%s].", @@ -89,7 +89,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_LT( sum_of_section, input_axis_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum of Attr(num_or_sections) other than unknown section " "must be less than the input's " "size " @@ -105,7 +105,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_EQ( sum_of_section, input_axis_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum of Attr(num_or_sections) must be equal to the input's " "size " "along the split dimension. But received Attr(num_or_sections)" diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc index 98072746e8eee..ad2ded506cd85 100644 --- a/paddle/fluid/operators/spp_op.cc +++ b/paddle/fluid/operators/spp_op.cc @@ -63,19 +63,19 @@ class SppOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::InvalidArgument( - "Input(X) of SppOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::InvalidArgument("Input(X) of SppOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of SppOp should not be null.")); auto in_x_dims = ctx->GetInputDim("X"); int pyramid_height = ctx->Attrs().Get("pyramid_height"); PADDLE_ENFORCE_EQ(in_x_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Spping intput must be of 4-dimensional.")); int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1]; // NOLINT @@ -91,11 +91,11 @@ class SppOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) must not be null.")); + phi::errors::InvalidArgument("Input(X) must not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::InvalidArgument("Input(X@GRAD) should not be null.")); + phi::errors::InvalidArgument("Input(X@GRAD) should not be null.")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } }; diff --git a/paddle/fluid/operators/squeeze_op.h b/paddle/fluid/operators/squeeze_op.h index 10ff809d60888..21aed7b00882c 100644 --- a/paddle/fluid/operators/squeeze_op.h +++ b/paddle/fluid/operators/squeeze_op.h @@ -45,7 +45,7 @@ framework::DDim GetOutputShape(const std::vector squeeze_dims, PADDLE_ENFORCE_GE( current, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each axis in Attr(axes) should be in the range of [%d, %d]" "But current axis is:%d, input tensor's shape = [%s].", -in_dims.size(), @@ -55,7 +55,7 @@ framework::DDim GetOutputShape(const std::vector squeeze_dims, PADDLE_ENFORCE_LT( current, in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each axis in Attr(axes) should be in the range of [%d, %d]" "But current axis is:%d, input tensor's shape = [%s].", -in_dims.size(), diff --git a/paddle/fluid/operators/stft_op.cc b/paddle/fluid/operators/stft_op.cc index 34f6ee854dd7b..b1165bf2bf295 100644 --- a/paddle/fluid/operators/stft_op.cc +++ b/paddle/fluid/operators/stft_op.cc @@ -36,20 +36,20 @@ class StftOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_rank, 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of StftOp should be a tensor with shape [N, T], " "but got rank %s.", x_rank)); PADDLE_ENFORCE_GT( hop_length, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute(hop_length) should be greater than 0, but got %s.", hop_length)); PADDLE_ENFORCE_EQ( window_size, n_fft, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Window) of StftOp should be equal with n_fft %s, " "but got %s.", n_fft, @@ -60,7 +60,7 @@ class StftOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE(n_fft, seq_length, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute(frame_length) should be less equal than " "sequence length, but got (%s) > (%s).", n_fft, diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index d8b7e35d6d3a1..6cbb99ff2032f 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -38,20 +38,20 @@ class SumOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( x_vars.size(), 0, - platform::errors::InvalidArgument("Input[X] should not be empty")); + phi::errors::InvalidArgument("Input[X] should not be empty")); PADDLE_ENFORCE_NOT_NULL( x_vars[0], - platform::errors::NotFound("Input var[%s] should not be nullptr", - x_vars_name[0])); + phi::errors::NotFound("Input var[%s] should not be nullptr", + x_vars_name[0])); if (x_vars[0]->IsType()) { int dtype = -1; for (size_t idx = 0; idx < x_vars.size(); ++idx) { PADDLE_ENFORCE_NOT_NULL( x_vars[idx], - platform::errors::NotFound("Input var[%s] should not be nullptr", - x_vars_name[idx])); + phi::errors::NotFound("Input var[%s] should not be nullptr", + x_vars_name[idx])); auto tensor = framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_vars[idx]); if (!tensor->IsInitialized()) { @@ -62,13 +62,13 @@ class SumOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(dtype, framework::TransToProtoVarType(tensor->dtype()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The inputs type of sum op must be same")); } } PADDLE_ENFORCE_NE(dtype, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum operator should have at least one tensor")); auto data_type = static_cast(dtype); @@ -108,13 +108,13 @@ class SumOp : public framework::OperatorWithKernel { } } } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected each tensor in Input(x) in sum op has be initialized, but " "some tensor in Input(x) is not be initialized, please check your " "code.", framework::ToTypeName(x_vars[0]->Type()))); } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Input(X) must be Tensor, SelectedRows or " "LodTensorArray. But got " "unsupport type: %s.", @@ -164,7 +164,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { << " type is " << ctx->GetInputType("X", static_cast(ind)) << "\n"; } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Not all inputs are tensor array:\n%s", os.str())); } var_type = framework::proto::VarType::LOD_TENSOR_ARRAY; diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 273e2c7b65100..c480bb9bb12e9 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -24,12 +24,12 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/diag_op.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/complex_functors.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -67,9 +67,9 @@ struct RealMulComplexFunctor { PADDLE_ENFORCE_LT( y.imag, 1e-6, - platform::errors::InvalidArgument("The image part of y must to be 0" - "but got [%d]", - y.imag)); + phi::errors::InvalidArgument("The image part of y must to be 0" + "but got [%d]", + y.imag)); return platform::complex>(x.real * y.real, x.imag * y.real); } @@ -79,9 +79,9 @@ static std::vector GetBroadcastShape(InTensors ins) { PADDLE_ENFORCE_EQ( ins.size(), 2, - platform::errors::InvalidArgument("GetBroadcastShape Receive 2 tensors" - "but got [%d]", - ins.size())); + phi::errors::InvalidArgument("GetBroadcastShape Receive 2 tensors" + "but got [%d]", + ins.size())); auto x_dim = ins[0]->dims(); auto y_dim = ins[1]->dims(); std::vector broadcast_shape = @@ -104,7 +104,7 @@ static std::vector GetBroadcastShape(InTensors ins) { broadcast_shape[final_rank - i] = x_dim[rank_x - i]; continue; } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Wrong Input Shape in broadcast operator: " "Input(X)'s shape must follow the broadcast rule with Input(Y)'s " "shape, but received [%s] (X) vs [%s] (Y).", @@ -125,14 +125,14 @@ static inline framework::DDim ComputeAndCheckShapeForConcatOp( PADDLE_ENFORCE_EQ( inputs_dims[i].size(), out_dims.size(), - platform::errors::InvalidArgument("The shape of input[0] and input[%d] " - "is expected to be equal." - "But received input[0]'s shape = " - "[%s], input[%d]'s shape = [%s].", - i, - inputs_dims[0], - i, - inputs_dims[i])); + phi::errors::InvalidArgument("The shape of input[0] and input[%d] " + "is expected to be equal." + "But received input[0]'s shape = " + "[%s], input[%d]'s shape = [%s].", + i, + inputs_dims[0], + i, + inputs_dims[i])); for (size_t j = 0; j < in_zero_dims_size; j++) { if (j == axis) { if (is_runtime) { @@ -151,7 +151,7 @@ static inline framework::DDim ComputeAndCheckShapeForConcatOp( // check all shape in run time PADDLE_ENFORCE_EQ(inputs_dims[0][j], inputs_dims[i][j], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %d-th dimension of input[0] and input[%d] " "is expected to be equal." "But received input[0]'s shape = " @@ -175,7 +175,7 @@ static inline int64_t ComputeAxisForConcatOp(int64_t axis, int64_t rank) { PADDLE_ENFORCE_EQ( axis >= -rank && axis < rank, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, @@ -205,7 +205,7 @@ static std::vector get_broadcast_batch_portion( PADDLE_ENFORCE_EQ( (x_size == y_size || x_size == 1 || y_size == 1), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The size of tensor x (%d) must match the size of tensor y " "(%d) at non-singleton dimension %d.", x_size, @@ -337,7 +337,7 @@ struct DeviceIndependenceTensorOperations { DITO_TRANSPOSE_RANK_CASE(5); DITO_TRANSPOSE_RANK_CASE(6); default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Invalid Rank number, " "currently only support rank between 2~6")); } @@ -350,11 +350,11 @@ struct DeviceIndependenceTensorOperations { int padding_value = 0) { PADDLE_ENFORCE_EQ(padding_value, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current diag only support padding_value = 0")); PADDLE_ENFORCE_EQ(offset, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current diag only support offset = 0," "you can use DiagOp instead(not recommend)")); @@ -362,7 +362,7 @@ struct DeviceIndependenceTensorOperations { int x_rank = x.dims().size(); std::vector out_shape; if (x_rank == 2) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Current diag only support vector" "-> diagonalized matrix, not support matrix -> vector," " Use DiagOp instead.")); @@ -371,7 +371,7 @@ struct DeviceIndependenceTensorOperations { out_shape.push_back(x.dims()[0]); } else { PADDLE_THROW( - platform::errors::InvalidArgument("Rank must less or equal than 2")); + phi::errors::InvalidArgument("Rank must less or equal than 2")); } ret = Fill({out_shape[0], out_shape[0]}, 0.0); T* output = ret.mutable_data(context.GetPlace()); @@ -540,11 +540,11 @@ struct DeviceIndependenceTensorOperations { PADDLE_ENFORCE_EQ( axes.size(), starts.size(), - platform::errors::InvalidArgument("Slice Operator Argument Invalided")); + phi::errors::InvalidArgument("Slice Operator Argument Invalided")); PADDLE_ENFORCE_EQ( ends.size(), starts.size(), - platform::errors::InvalidArgument("Slice Operator Argument Invalided")); + phi::errors::InvalidArgument("Slice Operator Argument Invalided")); for (unsigned int i = 0; i < axes.size(); ++i) { int axis = axes[i]; if (axis < 0) axis = rank + axis; @@ -553,7 +553,7 @@ struct DeviceIndependenceTensorOperations { int ed = ends[i]; PADDLE_ENFORCE_GT(ed, st, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "C++ Slice Operation Not Support End < Start")); out_shape[axis] = ed - st; } @@ -576,7 +576,7 @@ struct DeviceIndependenceTensorOperations { DITO_SLICE_RANK_CASE(5); DITO_SLICE_RANK_CASE(6); default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Invalid Rank number, " "currently only support rank between 2~6")); } @@ -584,22 +584,6 @@ struct DeviceIndependenceTensorOperations { return ret; } - phi::DenseTensor TrilTriu(const phi::DenseTensor& x, - int diagonal, - bool lower) { - framework::AttributeMap attrs; - attrs["diagonal"] = diagonal; - attrs["lower"] = lower; - NameInTensorMap inputs({{"X", {&x}}}); - int x_rank = x.dims().size(); - PADDLE_ENFORCE_GE( - x_rank, - 2, - platform::errors::InvalidArgument("Rank must be at least 2.")); - std::vector out_shape = common::vectorize(x.dims()); - return CreateOpRunAndReturnTensor("tril_triu", inputs, attrs, out_shape); - } - phi::DenseTensor TriangularSolve(const phi::DenseTensor& x, const phi::DenseTensor& y, bool upper, @@ -714,12 +698,12 @@ struct DeviceIndependenceTensorOperations { size_t rank = in->dims().size(); PADDLE_ENFORCE_EQ(start.size(), rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "EigenSliceWrapper function start " "argument must have the same length as input rank.")); PADDLE_ENFORCE_EQ(end.size(), rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "EigenSliceWrapper function end " "argument must have the same length as input rank.")); auto eigen_place_ptr = @@ -732,7 +716,7 @@ struct DeviceIndependenceTensorOperations { offsets_32bit[i] = start[i]; extents_32bit[i] = end[i]; } - EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( eigen_place, framework::To32BitIndex(out_t), framework::To32BitIndex(in_t), diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc index 7b9932ffb4a62..e14dc0e316219 100644 --- a/paddle/fluid/operators/tdm_child_op.cc +++ b/paddle/fluid/operators/tdm_child_op.cc @@ -60,18 +60,18 @@ class TDMChildOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of TdmChild should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(TreeInfo) of TdmChild should not be null.")); int child_nums = ctx->Attrs().Get("child_nums"); PADDLE_ENFORCE_GT( child_nums, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ValueError: The value of the 'child_nums' must greater than 0. " "But received child_nums value = %d, ", child_nums)); @@ -82,7 +82,7 @@ class TDMChildOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'tree info' must be 2. " "But received tree info's dimensions = %d, " "tree info's shape = [%s].", diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index 2d849e1849348..3380062743047 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -56,7 +56,7 @@ void TDMChildInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( input_data[input_ids], node_nums, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input id of OP(paddle.incubate.layers.tdm_child) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -65,7 +65,7 @@ void TDMChildInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( 0, input_data[input_ids], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input id of OP(paddle.incubate.layers.tdm_child) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -119,7 +119,7 @@ class TDMChildKernel : public framework::OpKernel { input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(input_type), @@ -136,7 +136,7 @@ class TDMChildKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( info_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(TreeInfo) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(info_type), @@ -156,7 +156,7 @@ class TDMChildKernel : public framework::OpKernel { output_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(out_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Child) & Output(LeafMask) holds the wrong " "type, it holds %s, but " "desires to be %s or %s", diff --git a/paddle/fluid/operators/tdm_sampler_op.cc b/paddle/fluid/operators/tdm_sampler_op.cc index d516af7718365..f7877b8268a04 100644 --- a/paddle/fluid/operators/tdm_sampler_op.cc +++ b/paddle/fluid/operators/tdm_sampler_op.cc @@ -81,15 +81,15 @@ class TDMSamplerOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Input) of TdmSampler should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Travel"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Travel) of TdmSampler should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Layer"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Layer) of TdmSampler should not be null.")); auto neg_samples_num_vec = ctx->Attrs().Get>("neg_samples_num_list"); diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index 52f86d633307b..7dcc72b66a1a6 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -93,7 +93,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( -1, input_id, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.tdm_sampler) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -102,7 +102,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( input_id, travel_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.tdm_sampler) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -126,7 +126,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( sample_num, node_nums - 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Neg sample nums id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected <= %ld - 1 (positive included), but got %ld. Please " "check neg_samples_num_list.", @@ -163,7 +163,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( positive_node_id, node_id_max, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Positive node id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected >= %ld and <= %ld, but got %ld. Please check input " "value.", @@ -174,7 +174,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( node_id_min, positive_node_id, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Positive node id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected >= %ld and <= %ld, but got %ld. Please check input " "value.", @@ -224,7 +224,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( layer_data[layer_offset_lod[layer_idx] + sample_res], node_id_max, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Negative node id of OP(fluid.layers.tdm_sampler) at layer %ld" "expected >= %ld and <= %ld, but got %ld. Please check input " "tdm tree structure and tdm travel info.", @@ -270,7 +270,7 @@ class TDMSamplerKernel : public framework::OpKernel { input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(input_type), @@ -286,7 +286,7 @@ class TDMSamplerKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( travel_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Travel) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(travel_type), @@ -301,7 +301,7 @@ class TDMSamplerKernel : public framework::OpKernel { layer_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(layer_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Layer) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(layer_type), @@ -312,7 +312,7 @@ class TDMSamplerKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( travel_type, layer_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Travel) must holds the same type with " "Input(Layer), but Travel holds %s, and Layer holds %s", paddle::framework::DataTypeToString(travel_type), diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index 332008894d5b9..29344b1ace0b0 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -39,32 +39,32 @@ class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { auto label_dims = ctx->GetInputDim("Label"); PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank should be 2. But received: " "Input(X)'s rank is [%d]", x_dims.size())); - PADDLE_ENFORCE_EQ(label_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "Input(Label)'s rank should be 2. But " - "received Input(Label)'s rank is [%d]", - label_dims.size())); + PADDLE_ENFORCE_EQ( + label_dims.size(), + 2UL, + phi::errors::InvalidArgument("Input(Label)'s rank should be 2. But " + "received Input(Label)'s rank is [%d]", + label_dims.size())); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( x_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of Input(X) and Input(Label) should " "be equal. The difference is [%d]: [%d]", x_dims[0], label_dims[0])); - PADDLE_ENFORCE_EQ(label_dims[1], - 1UL, - platform::errors::InvalidArgument( - "The 2nd dimension of " - "Input(Label) should be 1. But received " - "Input(Label)'s 2nd dim is [%d]", - label_dims[1])); + PADDLE_ENFORCE_EQ( + label_dims[1], + 1UL, + phi::errors::InvalidArgument("The 2nd dimension of " + "Input(Label) should be 1. But received " + "Input(Label)'s 2nd dim is [%d]", + label_dims[1])); } ctx->SetOutputDim("Y", {x_dims[0], 1}); ctx->ShareLoD("X", /*->*/ "Y"); @@ -128,18 +128,18 @@ class TeacherStudentSigmoidLossGradientOp PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank should be 2. But received Input(X)'s rank is [%d]", x_dims.size())); PADDLE_ENFORCE_EQ(dy_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y@Grad)'s rank should be 2. But received " "Input(Y@Grad)'s rank is [%d]", dy_dims.size())); PADDLE_ENFORCE_EQ(label_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Label)'s rank should be 2. But received " "Input(Y@Grad)'s rank is [%d]", label_dims.size())); @@ -147,7 +147,7 @@ class TeacherStudentSigmoidLossGradientOp PADDLE_ENFORCE_EQ( x_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of Input(X) and Input(Label) should " "be equal. The difference is [%d]: [%d]", x_dims[0], @@ -155,7 +155,7 @@ class TeacherStudentSigmoidLossGradientOp PADDLE_ENFORCE_EQ( x_dims[0], dy_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of Input(X) and Input(Y@Grad) should " "be equal. The difference is [%d]: [%d]", x_dims[0], @@ -163,14 +163,14 @@ class TeacherStudentSigmoidLossGradientOp PADDLE_ENFORCE_EQ( dy_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input(Y@Grad) should be 1. " "But received Input(Y@Grad)'s 2nd dimension is [%d]", dy_dims[1])); PADDLE_ENFORCE_EQ( label_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When Attr(soft_label) == false, the 2nd dimension of " "Input(Label) should be 1. But received Input(Label)'s 2nd " "dimension " diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index 51b75832d078a..d03f93e0503ae 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -156,10 +156,10 @@ template class TemporalShiftOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); int t = ctx.Attr("seg_num"); @@ -275,11 +275,11 @@ PD_REGISTER_STRUCT_KERNEL(temporal_shift, ops::TemporalShiftOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(temporal_shift_grad, GPU, ALL_LAYOUT, ops::TemporalShiftGradOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 69c7446d85d47..f7b5a9a8833d2 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -93,9 +93,9 @@ class LoDTensorArray2TensorOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( n, 0, - platform::errors::InvalidArgument("Input tensorarray size should > 0," - "but the received is %d", - n)); + phi::errors::InvalidArgument("Input tensorarray size should > 0," + "but the received is %d", + n)); std::string base_name = Inputs("X")[0]; std::vector names; @@ -229,9 +229,9 @@ class LoDTensorArray2TensorGradOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( n, 0, - platform::errors::InvalidArgument("Input tensorarray size should > 0, " - "but the received is: %d. ", - n)); + phi::errors::InvalidArgument("Input tensorarray size should > 0, " + "but the received is: %d. ", + n)); std::string base_name = Inputs("X")[0]; std::vector names; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 28dcaf3d43e31..2709d404320bb 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -75,7 +75,7 @@ static void RuntimeStaticShapeCheck(std::vector runtime_input_shape, PADDLE_ENFORCE_EQ( model_input_shape == runtime_input_shape, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input shapes are inconsistent with the model. Expect [%s] in " "model description, but got [%s] in runtime. TRT 5 " "or lower version " @@ -101,7 +101,7 @@ static phi::DataType TRT2FluidDataType(nvinfer1::DataType type) { return phi::DataType::BOOL; #endif default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "unknown fluid datatype in Fluid op converter")); return phi::DataType::FLOAT32; } @@ -114,7 +114,7 @@ static void RuntimeDynamicShapeCheck( const std::vector &max_input_shape) { // PADDLE_ENFORCE_EQ( // runtime_input_shape.size(), min_input_shape.size(), - // platform::errors::InvalidArgument( + // phi::errors::InvalidArgument( // "TRT engine runtime input %s dims size(%d) inconsistent " // "with the dynamic shape size(%d)", // x, runtime_input_shape.size(), min_input_shape.size())); @@ -139,7 +139,7 @@ static void RuntimeDynamicShapeCheck( PADDLE_ENFORCE_EQ(is_input_shape_valid( runtime_input_shape, min_input_shape, max_input_shape), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "TRT runtime input shape of %s is invalid. Expect " "runtime input shape to be within min/max input shape " "configured in SetTRTDynamicShapeInfo()," @@ -362,12 +362,12 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( min_input_shape.count(x), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input %s not found in TRT engine min_input_shape.", x)); PADDLE_ENFORCE_EQ( max_input_shape.count(x), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input %s not found in TRT engine max_input_shape.", x)); RuntimeDynamicShapeCheck(x, runtime_input_shape[x], @@ -560,7 +560,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( t.numel(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "This tensor must have one element, but got %ld.", t.numel())); t_shape.push_back(1); } @@ -571,7 +571,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( bind_index, num_bindings, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Wrong TRT engine input binding index. Expected The " "binding index of TRT engine input to be less than " "the number of inputs and outputs. Received binding " @@ -592,7 +592,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( runtime_batch, t_shape[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs of trt subgraphs has different batchsize. " "It's not allowed in static shape mode. " "Check whether the model you are running has multiple trt " @@ -693,7 +693,7 @@ class TensorRTEngineOp : public framework::OperatorBase { auto intrt_type = engine->engine()->getBindingDataType(intrt_index); PADDLE_ENFORCE_EQ(indata_type, intrt_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The TRT Engine OP's input type [%d] should equal " "to the input data type [%d].", static_cast(intrt_type), @@ -733,7 +733,7 @@ class TensorRTEngineOp : public framework::OperatorBase { buffers[bind_index] = static_cast(t.data()); #endif } else { - PADDLE_THROW(platform::errors::Fatal( + PADDLE_THROW(phi::errors::Fatal( "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } @@ -787,14 +787,14 @@ class TensorRTEngineOp : public framework::OperatorBase { auto *fluid_v = scope.FindVar(y); PADDLE_ENFORCE_NOT_NULL( fluid_v, - platform::errors::NotFound( + phi::errors::NotFound( "Output variable %s is not found in TensorRT subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); fluid_t->Resize(common::make_ddim(ddim)); PADDLE_ENFORCE_LT(bind_index, num_bindings, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The binding index in TRT engine should be less " "than the number of bindings, but got binding " "index = %d, number of bindings = %d.", @@ -813,7 +813,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_LE( runtime_batch, max_batch_size_, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The runtime batch size (%d) is greater than the max batch " "size(%d).\n" "There are two possible causes for this problem: \n" @@ -983,7 +983,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } PADDLE_ENFORCE_NOT_NULL( trt_engine_, - platform::errors::Fatal( + phi::errors::Fatal( "The pointer to tensorrt engine should not be null.")); return trt_engine_; } diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc index 9d961bbd57122..1819b587c7c1c 100644 --- a/paddle/fluid/operators/tile_op.cc +++ b/paddle/fluid/operators/tile_op.cc @@ -181,7 +181,7 @@ class TileCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { auto repeat_times = this->Attr>("repeat_times"); if (tensor_repeat_times.is_initialized() || tensor_repeat_times_attr.is_initialized()) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support RepeatTimes from tensor or repeat_times_tensor for " "tile composite grad for now. ")); } else { diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 32ee384f841d6..99b311960e77b 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -24,17 +24,17 @@ class TopkOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::InvalidArgument( - "Input(X) of TopkOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::InvalidArgument("Input(X) of TopkOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of TopkOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Indices"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Indices) of TopkOp should not be null.")); auto input_dims = ctx->GetInputDim("X"); @@ -42,18 +42,18 @@ class TopkOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(k, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute k must be >= 1, but got k is %d.", k)); PADDLE_ENFORCE_GE( input_dims.size(), 1, - platform::errors::InvalidArgument("input must have >= 1d shape")); + phi::errors::InvalidArgument("input must have >= 1d shape")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_GE( input_dims[input_dims.size() - 1], k, - platform::errors::InvalidArgument("input must have >= k columns")); + phi::errors::InvalidArgument("input must have >= k columns")); } framework::DDim dims = input_dims; @@ -104,19 +104,19 @@ class TopkOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should be not null")); + phi::errors::InvalidArgument("Input(X) should be not null")); PADDLE_ENFORCE_EQ( ctx->HasInput("Indices"), true, - platform::errors::InvalidArgument("Input(Indices) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Grad Input(Out) should be not null")); + phi::errors::InvalidArgument("Input(Indices) should be not null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Grad Input(Out) should be not null")); PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::InvalidArgument("Grad Output(X) should be not null")); + phi::errors::InvalidArgument("Grad Output(X) should be not null")); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 003f670133e45..1bb53891d8151 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ #endif #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/top_k_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/top_k_function_cuda.h" // set cub base traits in order to handle float16 @@ -61,10 +61,9 @@ template class TopkOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); @@ -124,12 +123,12 @@ class TopkOpCUDAKernel : public framework::OpKernel { gridx, input_height)); default: - PADDLE_THROW(platform::errors::Fatal( + PADDLE_THROW(phi::errors::Fatal( "the input k has error when use getMaxLength function to get the " "maxLength.")); }); default: - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Calculation error occurred in TopK Operator's CUDA Kernel.")); } } @@ -139,10 +138,9 @@ template class TopkOpGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(context.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(context.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* x = context.Input("X"); auto* out_grad = context.Input(framework::GradVarName("Out")); @@ -169,7 +167,7 @@ class TopkOpGradCUDAKernel : public framework::OpKernel { x_grad_data, indices_data, out_grad_data, row, col, k)); default: PADDLE_THROW( - platform::errors::Unavailable("Error occurs when Assign Grad.")); + phi::errors::Unavailable("Error occurs when Assign Grad.")); } } }; @@ -184,8 +182,7 @@ REGISTER_OP_CUDA_KERNEL( paddle::operators::TopkOpCUDAKernel, paddle::operators::TopkOpCUDAKernel, paddle::operators::TopkOpCUDAKernel, - paddle::operators::TopkOpCUDAKernel); + paddle::operators::TopkOpCUDAKernel); REGISTER_OP_CUDA_KERNEL( top_k_grad, @@ -194,4 +191,4 @@ REGISTER_OP_CUDA_KERNEL( paddle::operators::TopkOpGradCUDAKernel, paddle::operators::TopkOpGradCUDAKernel, paddle::operators::TopkOpGradCUDAKernel); + phi::dtype::float16>); diff --git a/paddle/fluid/operators/top_k_op_xpu.cc b/paddle/fluid/operators/top_k_op_xpu.cc index fff713236e9a6..48902ed3d8bd5 100644 --- a/paddle/fluid/operators/top_k_op_xpu.cc +++ b/paddle/fluid/operators/top_k_op_xpu.cc @@ -92,5 +92,5 @@ class TopkXPUKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL(top_k, ops::TopkXPUKernel, - ops::TopkXPUKernel); + ops::TopkXPUKernel); #endif diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index 9df0a1f3e36ed..a23461475397b 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -52,7 +52,7 @@ class TransferLayoutOp : public framework::OperatorWithKernel { if (in_tensor->layout() != DataLayout::ONEDNN) { PADDLE_ENFORCE_EQ(in_tensor->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The tensor of Input(X) is not initialized.")); } auto place = diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 2736171626121..1b4ef2d1b5abb 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -74,7 +74,7 @@ class TransferLayoutFunctor { PADDLE_ENFORCE_NE( in_layout, out_layout, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "No layout transform needed between two oneDNN OPKernels.")); if (in_layout != DataLayout::ONEDNN && out_layout == DataLayout::ONEDNN) { @@ -136,7 +136,7 @@ class TransferLayoutFunctor { PADDLE_ENFORCE_EQ( common::arity(in.dims()), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input dimension arity only can be 4, the input dimension is %s.", in.dims())); diff --git a/paddle/fluid/operators/unbind_op.h b/paddle/fluid/operators/unbind_op.h index 7a5bf4d34c47c..ea2c6d4ee2bb8 100644 --- a/paddle/fluid/operators/unbind_op.h +++ b/paddle/fluid/operators/unbind_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 6b84fd1612e65..bcff52e1af6d7 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -38,7 +38,7 @@ inline void UniformRealDistribution(T *data, } template <> -inline void UniformRealDistribution(paddle::platform::bfloat16 *data, +inline void UniformRealDistribution(phi::dtype::bfloat16 *data, const int64_t &size, const float &min, const float &max, @@ -48,7 +48,7 @@ inline void UniformRealDistribution(paddle::platform::bfloat16 *data, auto engine = phi::GetCPURandomEngine(seed); for (int64_t i = 0; i < size; ++i) { - data[i] = static_cast(dist(*engine)); + data[i] = static_cast(dist(*engine)); } } } // namespace @@ -85,7 +85,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be Tensor, " "SelectedRows. But got " "unsupport type: %s.", @@ -110,7 +110,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( size, (diag_num - 1) * (diag_step + 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeInvalid: the diagonal's elements is equal (num-1) " "* (step-1) with num %d, step %d," "It should be smaller than %d, but received %d", diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu index 0cf50142c04a0..0b81c690d573f 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu @@ -45,7 +45,7 @@ class GPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be " "phi::DenseTensor, " "SelectedRows. But got " diff --git a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc index f1afd8ef3e213..6a773c60997ea 100644 --- a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc +++ b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc @@ -50,7 +50,7 @@ class XPUUniformRandomInplaceKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( size, (diag_num - 1) * (diag_step + 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeInvalid: the diagonal's elements is equal (num-1) " "* (step-1) with num %d, step %d," "It should be smaller than %d, but received %d", diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 458794223dc74..2dbab83a2f528 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -60,7 +60,7 @@ inline std::vector GetNewDataFromShapeTensor( } return vec_new_data; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected dtype of ShapeTensor must be int32, int64. But got " "unsupport dtype: %s.", new_data_tensor->dtype())); @@ -76,7 +76,7 @@ inline std::vector GetNewDataFromShapeTensorList( PADDLE_ENFORCE_EQ( tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of dim tensor in uniform_random_op should be [1]" "But received tensor's dim=%s.", tensor->dims())); @@ -100,7 +100,7 @@ inline std::vector GetNewDataFromShapeTensorList( vec_new_shape.push_back(*tensor->data()); } } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected dtype of ShapeTensorList of %d-th must be int32, int64. " "But got " "unsupport dtype: %s.", diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 4d7a9eb5e4937..47bd4674c9a29 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -55,7 +55,7 @@ struct UniqueOpFunctor { PADDLE_ENFORCE_LT( in_->numel(), pow(2, 31), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num of Input(X) elements should be less then INT_MAX, " "but received num is %d.", in_->numel())); @@ -84,7 +84,7 @@ struct UniqueOpFunctor { index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Index holds the wrong type, it holds %s, " "but desires to be %s or %s", paddle::framework::DataTypeToString(index_type), @@ -406,7 +406,7 @@ class UniqueKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( x->numel(), INT_MAX, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements in Input(X) should be less than or " "equal to INT_MAX, but received num is %d. Please set `dtype` to " "int64.", diff --git a/paddle/fluid/operators/unique_with_counts_op.cc b/paddle/fluid/operators/unique_with_counts_op.cc index 5272158805d71..2e4af44ac8a1f 100644 --- a/paddle/fluid/operators/unique_with_counts_op.cc +++ b/paddle/fluid/operators/unique_with_counts_op.cc @@ -34,9 +34,9 @@ class UniqueWithCountsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 1, - platform::errors::InvalidArgument("The Input(X) should be 1-D Tensor, " - "But now the dims of Input(X) is %d.", - in_dims.size())); + phi::errors::InvalidArgument("The Input(X) should be 1-D Tensor, " + "But now the dims of Input(X) is %d.", + in_dims.size())); ctx->SetOutputDim("Out", {-1}); ctx->SetOutputDim("Index", in_dims); diff --git a/paddle/fluid/operators/unzip_op.cc b/paddle/fluid/operators/unzip_op.cc index b1b3d42282c40..a72c0c6a878f3 100644 --- a/paddle/fluid/operators/unzip_op.cc +++ b/paddle/fluid/operators/unzip_op.cc @@ -32,8 +32,8 @@ class unzipOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( lod_dims.size(), 1UL, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 1, but got %d", lod_dims.size())); + phi::errors::InvalidArgument("Input(X)'s rank should be 1, but got %d", + lod_dims.size())); auto len = static_cast(ctx->Attrs().Get("len")); ctx->SetOutputDim("Y", {lod_dims[0] - 1, len}); } @@ -70,13 +70,13 @@ class unzipGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", x_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + x_dims.size())); PADDLE_ENFORCE_EQ( lod_dims.size(), 1, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 1, but got %d", lod_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 1, but got %d", + lod_dims.size())); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->ShareLoD("X", framework::GradVarName("X")); diff --git a/paddle/fluid/operators/unzip_op.cu b/paddle/fluid/operators/unzip_op.cu index 39d80e8c6ce92..5be9bdea2b752 100644 --- a/paddle/fluid/operators/unzip_op.cu +++ b/paddle/fluid/operators/unzip_op.cu @@ -82,7 +82,7 @@ PD_REGISTER_STRUCT_KERNEL(unzip, ops::unzipCUDAKernel, float, double, - plat::float16, + phi::dtype::float16, bool, int, int64_t, @@ -94,7 +94,7 @@ PD_REGISTER_STRUCT_KERNEL(unzip_grad, ops::unzipGradCUDAKernel, float, double, - plat::float16, + phi::dtype::float16, bool, int, int64_t, diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index cecd2e2931af6..feca2d9c722ac 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -29,13 +29,13 @@ inline std::vector GetDataFromTensorList( std::vector vec_new_data; for (size_t i = 0; i < list_tensor.size(); ++i) { auto tensor = list_tensor[i]; - PADDLE_ENFORCE_EQ(tensor->dims(), - common::make_ddim({1}), - platform::errors::InvalidArgument( - "The shape of Tensor in list must be [1]. " - "But received its shape " - "is [%s]", - tensor->dims())); + PADDLE_ENFORCE_EQ( + tensor->dims(), + common::make_ddim({1}), + phi::errors::InvalidArgument("The shape of Tensor in list must be [1]. " + "But received its shape " + "is [%s]", + tensor->dims())); if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT32) { @@ -57,7 +57,7 @@ inline std::vector GetDataFromTensorList( vec_new_data.push_back(static_cast(*tensor->data())); } } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The dtype of Tensor in list must be int32 or int64, but received: " "%s", tensor->dtype())); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index 86e3fc3420ed6..e8d69083e532e 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -66,33 +66,33 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("X(Input) of VarConv2dOP is not found.")); + phi::errors::NotFound("X(Input) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::NotFound("W(Input) of VarConv2dOP is not found.")); + phi::errors::NotFound("W(Input) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ROW"), true, - platform::errors::NotFound("Input(ROW) of VarConv2dOP is not found.")); + phi::errors::NotFound("Input(ROW) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("COLUMN"), true, - platform::errors::NotFound("Input(COLUMN) of VarConv2dOP is not found.")); + phi::errors::NotFound("Input(COLUMN) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound("Out(Output) of VarConv2dOP is not found.")); + phi::errors::NotFound("Out(Output) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Col"), true, - platform::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); + phi::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of X(Input) can't be less than 2, but received rank is %u.", x_dims.size())); @@ -101,7 +101,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W should be a 2-D tensor, but its actual dimension is %u.", w_dims.size())); int output_channel = ctx->Attrs().Get("OutputChannel"); @@ -111,7 +111,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[0], output_channel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W's dimension[0] should be equal to OutputChannel, the " "dimension[0] is %d, OutputChannel is %d.", w_dims[0], @@ -119,7 +119,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[1], input_channel * kernel_h * kernel_w, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W's dimension[1] should be equal to InputChannel * StrideH * " "StrideW, the dimension[1] is %d, expected value is %d.", w_dims[1], @@ -131,17 +131,17 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { const auto& x_lod = x_var->Get().lod(); PADDLE_ENFORCE_EQ(!x_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) phi::DenseTensor of VarConv2dOP " "does not contain LoD information.")); - PADDLE_ENFORCE_GE(x_lod.size(), - 1, - platform::errors::InvalidArgument( - "The Input(X)'s lod info is corrupted.")); + PADDLE_ENFORCE_GE( + x_lod.size(), + 1, + phi::errors::InvalidArgument("The Input(X)'s lod info is corrupted.")); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod[0].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)'s lod info mismatches the actual " "tensor shape, input lod is %s, tensor shape is %s.", x_lod, @@ -153,7 +153,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( !row_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(ROW) phi::DenseTensor of VarConv2dOP does not " "contain LoD information.")); @@ -163,7 +163,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( !col_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(COLUMN) phi::DenseTensor of VarConv2dOP does not " "contain LoD information.")); } else { @@ -370,17 +370,17 @@ class VarConv2dGradMaker : public framework::SingleGradOpMaker { }; void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequencePadGradOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), - true, - platform::errors::NotFound( - "Input(W) of SequencePadGradOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of SequencePadGradOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("W"), + true, + phi::errors::NotFound("Input(W) of SequencePadGradOp is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequencePadGradOp is not found.")); if (ctx->HasOutput(framework::GradVarName("X"))) { diff --git a/paddle/fluid/operators/xpu_api_wrapper.h b/paddle/fluid/operators/xpu_api_wrapper.h deleted file mode 100644 index c23fb1ae02ab4..0000000000000 --- a/paddle/fluid/operators/xpu_api_wrapper.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#ifdef PADDLE_WITH_XPU -#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h" - -namespace paddle { -namespace operators { - -using float16 = typename XPUTypeTrait::Type; - -} // namespace operators -} // namespace paddle -#endif diff --git a/paddle/fluid/pir/CMakeLists.txt b/paddle/fluid/pir/CMakeLists.txt index 9e883ef21af9a..7647a7efdf660 100644 --- a/paddle/fluid/pir/CMakeLists.txt +++ b/paddle/fluid/pir/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(dialect) add_subdirectory(transforms) add_subdirectory(drr) add_subdirectory(utils) +add_subdirectory(serialize_deserialize) diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt index 59db81550bb8b..0b2fc8c47b75f 100644 --- a/paddle/fluid/pir/dialect/CMakeLists.txt +++ b/paddle/fluid/pir/dialect/CMakeLists.txt @@ -110,7 +110,7 @@ set(generated_files_pd_op "${pir_bwd_op_source_file}" "${pir_update_op_source_file}") -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(pir_op_onednn_yaml ${parsed_op_dir}/onednn.parsed.yaml) set(pd_onednn_op_yaml_file @@ -250,7 +250,7 @@ set(op_dialect_srcs ${api_source_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/transforms/shape_optimization_pass.cc) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(op_dialect_srcs ${op_dialect_srcs} ${onednn_op_source_file} ${op_onednn_info_file} ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/manual_onednn_op.cc) @@ -263,7 +263,14 @@ file(GLOB_RECURSE dist_dialect_srcs # if(WITH_DISTRIBUTE) FIXME in next PR set(op_dialect_srcs ${op_dialect_srcs} ${dist_dialect_srcs}) # endif() -set(op_dialect_deps phi common pir type_info string_helper) +set(op_dialect_deps + phi + common + pir + type_info + string_helper + global_utils + amp) if(WITH_ROCM) set(op_dialect_deps ${op_dialect_deps} global_utils) endif() @@ -283,13 +290,13 @@ set(op_dialect_vjp_srcs ${op_vjp_source_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/primitive/base/decomp_trans.cc) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(op_dialect_vjp_srcs ${op_dialect_vjp_srcs} ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/op_onednn_dialect.cc) endif() -set(op_dialect_vjp_deps primitive_vjp_experimental op_dialect) +set(op_dialect_vjp_deps primitive_vjp_experimental op_dialect prim_utils) cc_library( op_dialect_vjp diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc index 3382fa18b9090..6ba2b16d00df2 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc @@ -45,23 +45,20 @@ pir::Value shard_tensor(const pir::Value& x, return shard_tensor_op.out(); } -pir::Value reshard(const pir::Value& x, - const phi::distributed::ProcessMesh& process_mesh, - const std::vector& dims_mapping) { +pir::Value reshard( + const pir::Value& x, + const phi::distributed::ProcessMesh& process_mesh, + const std::vector& dims_mapping, + const flat_hash_map& partial_status) { pir::IrContext* ctx = pir::IrContext::Instance(); - // TODO(ywt01) get partial_status by func parameter - paddle::flat_hash_map partial_status; TensorDistAttribute tensor_dist_attr = TensorDistAttribute::get(ctx, process_mesh, dims_mapping, partial_status); - - auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( - x, tensor_dist_attr); - return reshard_op.result(0); + return reshard(x, tensor_dist_attr); } pir::Value reshard(const pir::Value& x, const TensorDistAttribute& tensor_dist_attr) { - auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( + auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( x, tensor_dist_attr); return reshard_op.result(0); } diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_api.h b/paddle/fluid/pir/dialect/distributed/ir/dist_api.h index 18aa1bb32ca64..5706afa63c165 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_api.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_api.h @@ -29,9 +29,11 @@ pir::Value shard_tensor(const pir::Value& x, const phi::distributed::ProcessMesh& process_mesh, const std::vector& dims_mapping); -pir::Value reshard(const pir::Value& x, - const phi::distributed::ProcessMesh& process_mesh, - const std::vector& dims_mapping); +pir::Value reshard( + const pir::Value& x, + const phi::distributed::ProcessMesh& process_mesh, + const std::vector& dims_mapping, + const flat_hash_map& partial_status = {}); pir::Value reshard(const pir::Value& x, const TensorDistAttribute& tensor_dist_attr); diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc index 0ea42bf6e093d..5834ba6262f3f 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc @@ -35,7 +35,7 @@ void DistDialect::initialize() { TensorDistAttribute, OperationDistAttribute>(); RegisterTypes(); - RegisterOps(); + RegisterOps(); } void DistDialect::PrintType(pir::Type type, std::ostream &os) const { diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc index cc06461e66d55..d419ea7d4d165 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/pir/dialect/distributed/ir/dist_op.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_type.h" +#include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" @@ -28,7 +29,7 @@ namespace paddle { namespace dialect { const char* ShardTensorOp::attributes_name[1] = {"op_dist_attr"}; -const char* ReShardOp::attributes_name[1] = {"op_dist_attr"}; +const char* ReshardOp::attributes_name[1] = {"op_dist_attr"}; void ShardTensorOp::VerifySig() { VLOG(4) @@ -159,8 +160,54 @@ void ShardTensorOp::Build(pir::Builder& builder, ::pir::PassStopGradientsDefaultly(argument); } -void ReShardOp::VerifySig() { - VLOG(4) << "Start Verifying inputs, outputs and attributes for: ReShardOp."; +OpInfoTuple ReshardOp::GetOpInfo() { + return OpInfoTuple( + {OpInputInfo()}, {}, {OpOutputInfo()}, OpRunTimeInfo(), "reshard"); +} + +std::vector> ReshardOp::Vjp( + pir::Operation* op, + const std::vector>& inputs_, + const std::vector>& outputs, + const std::vector>& out_grads, + const std::vector>& stop_gradients) { + VLOG(6) << "Start call vjp for reshard op."; + PADDLE_ENFORCE_EQ( + inputs_.size(), + 1, + common::errors::InvalidArgument("reshard op's inputs' size should be 1")); + PADDLE_ENFORCE_EQ(inputs_[0].size(), + 1, + common::errors::InvalidArgument( + "reshard op's inputs[0]'s size should be 1")); + auto dist_type = inputs_[0][0].type().dyn_cast(); + + PADDLE_ENFORCE_NOT_NULL( + dist_type, + common::errors::InvalidArgument( + "Currently, reshard op's inputs type must be dist type.")); + + PADDLE_ENFORCE_EQ(out_grads.size(), + 1, + common::errors::InvalidArgument( + "reshard op's outputs grad size should be 1")); + + PADDLE_ENFORCE_EQ(out_grads[0].size(), + 1, + common::errors::InvalidArgument( + "reshard op's outputs grad[0] size should be 1")); + + auto& builder = *ApiBuilder::Instance().GetBuilder(); + + auto grad_op = + builder.Build(out_grads[0][0], dist_type.tensor_dist_attr()); + + VLOG(6) << "End call vjp for reshard op."; + + return {std::vector{grad_op->result(0)}}; +} +void ReshardOp::VerifySig() { + VLOG(4) << "Start Verifying inputs, outputs and attributes for: ReshardOp."; VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); @@ -224,11 +271,11 @@ void ReShardOp::VerifySig() { VLOG(4) << "End Verifying for: ShardTensorOp."; } -void ReShardOp::Build(pir::Builder& builder, +void ReshardOp::Build(pir::Builder& builder, pir::OperationArgument& argument, pir::Value input, TensorDistAttribute tensor_dist_attr) { - VLOG(4) << "Start build ReShardOp"; + VLOG(4) << "Start build ReshardOp"; paddle::dialect::DistDenseTensorType input_tensor_type; if (input.type().isa()) { @@ -270,10 +317,11 @@ void ReShardOp::Build(pir::Builder& builder, tensor_dist_attr, local_shape); argument.AddOutput(out_dist_tensor_type); + ::pir::PassStopGradientsDefaultly(argument); } } // namespace dialect } // namespace paddle IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ShardTensorOp) -IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ReShardOp) +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ReshardOp) diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_op.h b/paddle/fluid/pir/dialect/distributed/ir/dist_op.h index 7ae81a0040702..638fb430eaf4e 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_op.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_op.h @@ -15,6 +15,8 @@ #pragma once #include +#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" +#include "paddle/fluid/pir/dialect/operator/interface/vjp.h" #include "paddle/pir/include/core/builder.h" #include "paddle/pir/include/core/builtin_type.h" #include "paddle/pir/include/core/op_base.h" @@ -39,7 +41,7 @@ class ShardTensorOp : public pir::Op { void VerifySig(); }; -class ReShardOp : public pir::Op { +class ReshardOp : public pir::Op { public: using Op::Op; static const char* name() { return "dist_op.reshard"; } @@ -49,10 +51,19 @@ class ReShardOp : public pir::Op { pir::OperationArgument& argument, // NOLINT pir::Value input, TensorDistAttribute tensor_dist_attr); + + static OpInfoTuple GetOpInfo(); + static std::vector> Vjp( + pir::Operation* op, + const std::vector>& inputs_, + const std::vector>& outputs, + const std::vector>& out_grads, + const std::vector>& stop_gradients); + void VerifySig(); }; } // namespace dialect } // namespace paddle IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ShardTensorOp) -IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ReShardOp) +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ReshardOp) diff --git a/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py b/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py index 4d37aaf829861..80b198c8d533a 100644 --- a/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py +++ b/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py @@ -38,6 +38,7 @@ "leaky_relu", "log_softmax", "mean", + "p_norm", "pow", "relu", "relu6", @@ -45,7 +46,6 @@ "silu", "swiglu", "softmax", - "sqrt", "square", "squeeze", "stack", @@ -72,6 +72,7 @@ "leaky_relu", "log_softmax", "mean", + "p_norm", "pow", "relu", "relu6", @@ -79,7 +80,6 @@ "silu", "swiglu", "softmax", - "sqrt", "square", "squeeze", "stack", diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py index 37e620ab24589..ebe06caab438a 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py @@ -54,6 +54,7 @@ 'BatchNormOp', 'FetchOp', 'FullIntArrayOp', + 'FusedConv2dAddActOp', 'MatmulOp', 'SoftmaxOp', 'ReshapeOp', @@ -86,6 +87,7 @@ #include "paddle/fluid/pir/dialect/operator/interface/decomp.h" #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_symbolic_shape.h" #include "paddle/fluid/pir/dialect/operator/interface/infermeta.h" +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" #include "paddle/fluid/pir/dialect/operator/interface/parse_kernel_key.h" #include "paddle/fluid/pir/dialect/operator/interface/vjp.h" @@ -180,6 +182,7 @@ class {TEST_API} {op_name} : public pir::Op<{op_name}{interfaces}{traits}> {{ CC_FILE_TEMPLATE = """// This file is generated by "paddle/fluid/pir/dialect/op_generator/op_gen.py" #include "{h_file}" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_type.h" +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py index 5ad1c5b562740..8ba3d64ad39a3 100644 --- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py @@ -125,6 +125,7 @@ 'add_n_', 'all_reduce', 'all_reduce_', + 'assign_pos', 'batch_fc', 'barrier', 'c_allgather', @@ -140,8 +141,12 @@ 'c_softmax_with_cross_entropy', 'c_split', 'decayed_adagrad', + 'distributed_fused_lamb', + 'distributed_fused_lamb_', 'distributed_push_sparse', 'distributed_lookup_table', + 'dgc_momentum', + 'dgc', 'dpsgd', 'embedding_grad_sparse', 'ftrl', diff --git a/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py b/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py index 4b2bbc3c54999..c0620d4dbdc43 100644 --- a/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py +++ b/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py @@ -22,7 +22,13 @@ # remove this file and support Vjp methods # code gen. +# Operators which only has composite implementation should be added below. +# For example +# * `silu_double_grad` only has composite implementation, so `silu_grad` was added below. +# * `log_double_grad` has both composite and kernel implementation, so `log_grad` should not be added below. vjp_interface_black_list = [ 'silu_grad', + 'exp_grad', + 'abs_double_grad', ] diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc index 42b3567290cda..0c775ef2ee74c 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc @@ -205,7 +205,9 @@ bool SparseWeightEmbeddingOpInferSymbolicShape( bool ExpandAsOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + op->name() + + " 's InferSymbolicShape interface is NOT implemented " + "now because of the lack of necessary information.")); return true; } @@ -354,8 +356,16 @@ bool KronOpInferSymbolicShape(pir::Operation *op, bool MaskedSelectOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const std::vector &out_dims = [&] { + std::vector out_dims; + symbol::DimExpr out_shape = + shape_analysis->GetNextSymName(); // unknown until runtime + out_dims.push_back(out_shape); + return out_dims; + }(); + // TODO(fty1777): Add constrains between the shapes of x and mask + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); return true; } @@ -462,8 +472,12 @@ bool MatmulOpInferSymbolicShape( bool SearchsortedOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // The shape of output is the same as input `values` (op->operand_source(1)) + const symbol::ShapeOrDataDimExprs &operand_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(1)); + // TODO(fty1777): Add constrains between the shapes of `sorted_sequence` and + // `values` + shape_analysis->SetShapeOrDataForValue(op->result(0), operand_shape_or_data); return true; } diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h index 345c55e1a116b..a1d6f5845802e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h @@ -159,13 +159,19 @@ inline ShapeOrData SliceRawInferSymbolicShape( // Currently, we DO NOT support the case that any element in `axes` `starts` // or `ends` is a Symbol. auto vec_int64 = details::VecExpr2Int64(starts); - IR_ENFORCE(vec_int64.has_value(), - "for slice op, all the elements in `starts` must be int64_t"); + PADDLE_ENFORCE_EQ( + vec_int64.has_value(), + true, + phi::errors::InvalidArgument( + "for slice op, all the elements in `starts` must be int64_t")); std::vector starts_int = vec_int64.value(); vec_int64 = details::VecExpr2Int64(ends); - IR_ENFORCE(vec_int64.has_value(), - "for slice op, all the elements in `ends` must be int64_t"); + PADDLE_ENFORCE_EQ( + vec_int64.has_value(), + true, + phi::errors::InvalidArgument( + "for slice op, all the elements in `ends` must be int64_t")); std::vector ends_int = vec_int64.value(); const int64_t start = diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc index 3a1c411caf1b3..32413ba05bbfb 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc @@ -33,8 +33,6 @@ bool BicubicInterpOpInferSymbolicShape( int out_d = attributes.at("out_d").dyn_cast().data(); int out_h = attributes.at("out_h").dyn_cast().data(); int out_w = attributes.at("out_w").dyn_cast().data(); - const std::vector &scale = - paddle::dialect::details::GetVectorAttr(op, "scale"); std::vector size_tensor; if (out_d != -1) size_tensor.push_back(out_d); @@ -261,7 +259,7 @@ bool ConcatOpInferSymbolicShape( bool FullWithTensorOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - pir::Value operand_source = op->operand_source(0); + pir::Value operand_source = op->operand_source(1); const symbol::ShapeOrDataDimExprs &operand_shape_or_data = shape_analysis->GetShapeOrDataForValue(operand_source); diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index cdbb016158b23..1fab33c350f0e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -330,8 +330,41 @@ bool MinOpInferSymbolicShape(pir::Operation *op, bool PadOpInferSymbolicShape(pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // input(0): Tensor x + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ(x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of PadOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + + // input(1): int[] paddings + std::vector paddings = + paddle::dialect::details::GetVectorAttr(op, "paddings"); + PADDLE_ENFORCE_EQ(rank * 2, + paddings.size(), + phi::errors::InvalidArgument( + "The size of paddings should be 2 * input's rank. But " + "got paddings.size() = %d, input's rank = %d.", + paddings.size(), + rank)); + + // output + const auto &out_dims = [&] { + std::vector out_dims; + out_dims.reserve(rank); + for (size_t i = 0; i < rank; ++i) { + out_dims.push_back(x_dims_sym[i] + paddings[2 * i] + paddings[2 * i + 1]); + } + return out_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs(out_dims)); + return true; } @@ -587,6 +620,8 @@ bool SplitOpInferSymbolicShape(pir::Operation *op, .dyn_cast() .data() .to(); + size_t rank = x_dims_sym.size(); + axis = axis >= 0 ? axis : std::max(int64_t(0), int64_t(axis + rank)); // sections const std::vector §ions_sym = [&] { @@ -880,10 +915,13 @@ bool Transpose_OpInferSymbolicShape( bool SqueezeOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - IR_ENFORCE(op->num_operands() == 2, - "SqueezeOpInferSymbolicShape ONLY support num_operands() == 2 " - "now, but got %d operands", - op->num_operands()); + PADDLE_ENFORCE_EQ( + op->num_operands(), + 2, + phi::errors::InvalidArgument( + "SqueezeOpInferSymbolicShape ONLY support num_operands() == 2 " + "now, but got %d operands", + op->num_operands())); auto x_shape_or_data = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); @@ -906,10 +944,13 @@ bool SqueezeOpInferSymbolicShape( std::vector squeeze_dims; for (auto squeeze_dim : squeeze_dims_sym) { - IR_ENFORCE(squeeze_dim.Has(), - "in SqueezeOpInferSymbolicShape, axes must be known int type, " - "but got: %s", - symbol::ToString(squeeze_dim)); + PADDLE_ENFORCE_EQ( + squeeze_dim.Has(), + true, + phi::errors::InvalidArgument( + "in SqueezeOpInferSymbolicShape, axes must be known int type, " + "but got: %s", + symbol::ToString(squeeze_dim))); squeeze_dims.emplace_back( static_cast(squeeze_dim.Get())); } @@ -972,31 +1013,186 @@ bool Squeeze_OpInferSymbolicShape( bool UnbindOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // input + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UnbindOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + + // axis + int axis = op->attributes().at("axis").dyn_cast().data(); + int rank = x_dims_sym.size(); + axis = axis >= 0 ? axis : axis + rank; + + // output + const symbol::TensorListShapeOrDataDimExprs &output_shape_data_list = [&] { + symbol::TensorListShapeOrDataDimExprs shape_data_list; + std::vector output_dims_sym = x_dims_sym; + + const symbol::DimExpr &unbound_dim = x_dims_sym.at(axis); + PADDLE_ENFORCE_EQ(unbound_dim.isa(), + true, + phi::errors::InvalidArgument( + "InferSymbolicShape of UnbindOp only support unbound " + "dim with constant length!")); + output_dims_sym.erase(output_dims_sym.begin() + axis); + const int64_t unbound_dim_length = unbound_dim.dyn_cast(); + + for (uint32_t idx = 0; idx < unbound_dim_length; idx++) { + shape_data_list.push_back( + symbol::TensorShapeOrDataDimExprs(output_dims_sym)); + } + return shape_data_list; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::ShapeOrDataDimExprs{output_shape_data_list}); + return true; } bool UniqueOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UniqueOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + std::vector axes = + paddle::dialect::details::GetVectorAttr(op, "axis"); + + symbol::DimExpr unique_dim_sym = + shape_analysis->GetNextSymName(); // unknown until runtime + + const std::vector &counts_dims = [&] { + std::vector out_dims; + out_dims.push_back(unique_dim_sym); + return out_dims; + }(); + + const std::vector &index_dims = counts_dims; + + const std::vector &out_dims = [&] { + if (axes.empty()) { + return counts_dims; + } + std::vector out_dims = x_dims_sym; + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + out_dims[axis] = unique_dim_sym; + return out_dims; + }(); + + const std::vector &inverse_dims = [&] { + std::vector inverse_dims; + if (axes.empty()) { + // flatten before unique + symbol::DimExpr product{1}; + for (const auto &x_dim : x_dims_sym) { + product = product * x_dim; + } + inverse_dims.push_back(product); + } else { + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + inverse_dims.push_back(x_dims_sym[axis]); + } + return inverse_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(1), symbol::TensorShapeOrDataDimExprs{index_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(2), symbol::TensorShapeOrDataDimExprs{inverse_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(3), symbol::TensorShapeOrDataDimExprs{counts_dims}); + return true; } bool UniqueConsecutiveOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UniqueConsecutiveOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + std::vector axes = + paddle::dialect::details::GetVectorAttr(op, "axis"); + + symbol::DimExpr unique_dim_sym = + shape_analysis->GetNextSymName(); // unknown until runtime + + const std::vector &counts_dims = [&] { + std::vector out_dims; + out_dims.push_back(unique_dim_sym); + return out_dims; + }(); + + const std::vector &out_dims = [&] { + if (axes.empty()) { + return counts_dims; + } + std::vector out_dims = x_dims_sym; + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + out_dims[axis] = unique_dim_sym; + return out_dims; + }(); + + const std::vector &inverse_dims = [&] { + std::vector inverse_dims; + if (axes.empty()) { + // flatten before unique + symbol::DimExpr product{1}; + for (const auto &x_dim : x_dims_sym) { + product = product * x_dim; + } + inverse_dims.push_back(product); + } else { + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + inverse_dims.push_back(x_dims_sym[axis]); + } + return inverse_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(1), symbol::TensorShapeOrDataDimExprs{inverse_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(2), symbol::TensorShapeOrDataDimExprs{counts_dims}); + return true; } bool UnsqueezeOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - IR_ENFORCE(op->num_operands() == 2, - "UnsqueezeOp InferSymbolicShape ONLY support num_operands() == 2 " - "now, but got %d operands", - op->num_operands()); + PADDLE_ENFORCE_EQ( + op->num_operands(), + 2, + phi::errors::InvalidArgument( + "UnsqueezeOp InferSymbolicShape ONLY support num_operands() == 2 " + "now, but got %d operands", + op->num_operands())); auto x_shape_or_data = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); @@ -1025,10 +1221,13 @@ bool UnsqueezeOpInferSymbolicShape( int cur_output_rank = x_dims_size; for (auto axis_expr : axes_sym) { - IR_ENFORCE(axis_expr.Has(), - "in UnsqueezeOpInferSymbolicShape, axes must be known int type, " - "but got: %s", - symbol::ToString(axis_expr)); + PADDLE_ENFORCE_EQ( + axis_expr.Has(), + true, + phi::errors::InvalidArgument( + "in UnsqueezeOpInferSymbolicShape, axes must be known int type, " + "but got: %s", + symbol::ToString(axis_expr))); int axis = static_cast(axis_expr.Get()); int cur = axis < 0 ? axis + cur_output_rank + 1 : axis; diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc new file mode 100644 index 0000000000000..c6c1401f32d5c --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" + +namespace paddle { +namespace dialect { + +template <> +common::DataLayout PreferLayoutImpl(pir::Operation* op) { + return common::DataLayout::NHWC; +} + +template <> +void RewriteByLayoutImpl(pir::Operation* op, + common::DataLayout new_layout) { + return; +} + +} // namespace dialect +} // namespace paddle +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LayoutTransformationInterface) diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h new file mode 100644 index 0000000000000..71678029fb48c --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h @@ -0,0 +1,106 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp" + +#include "paddle/common/enforce.h" +#include "paddle/common/layout.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_type.h" +#include "paddle/fluid/pir/dialect/operator/utils/utils.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/op_base.h" +#include "paddle/pir/include/core/type_name.h" + +namespace paddle { +namespace dialect { + +class LayoutTransformationInterface + : public pir::OpInterfaceBase { + public: + using PreferLayoutFn = common::DataLayout (*)(pir::Operation*); + using RewriteByLayoutFn = void (*)(pir::Operation*, common::DataLayout); + using RelevantInputsFn = std::vector (*)(pir::Operation*); + using RelevantOutputsFn = std::vector (*)(pir::Operation*); + + struct Concept { + explicit Concept(PreferLayoutFn prefer_layout, + RewriteByLayoutFn rewrite_by_layout, + RelevantInputsFn relevant_inputs, + RelevantOutputsFn relevant_outputs) + : prefer_layout(prefer_layout), + rewrite_by_layout(rewrite_by_layout), + relevant_inputs(relevant_inputs), + relevant_outputs(relevant_outputs) {} + + PreferLayoutFn prefer_layout; + RewriteByLayoutFn rewrite_by_layout; + RelevantInputsFn relevant_inputs; + RelevantOutputsFn relevant_outputs; + }; + + template + struct Model : public Concept { + static common::DataLayout PreferLayoutModel(pir::Operation* op) { + return PreferLayoutImpl(op); + } + + static void RewriteByLayoutModel(pir::Operation* op, + common::DataLayout new_layout) { + RewriteByLayoutImpl(op, new_layout); + } + + static std::vector RelevantInputsModel(pir::Operation* op) { + return RelevantInputsImpl(op); + } + + static std::vector RelevantOutputsModel(pir::Operation* op) { + return RelevantOutputsImpl(op); + } + + Model() + : Concept(PreferLayoutModel, + RewriteByLayoutModel, + RelevantInputsModel, + RelevantOutputsModel) {} + }; + + LayoutTransformationInterface(pir::Operation* op, Concept* impl) + : pir::OpInterfaceBase(op), impl_(impl) {} + + common::DataLayout PreferLayout(pir::Operation* op) { + return impl_->prefer_layout(op); + } + + void RewriteByLayout(pir::Operation* op, common::DataLayout new_layout) { + impl_->rewrite_by_layout(op, new_layout); + } + + std::vector RelevantInputs(pir::Operation* op) { + return impl_->relevant_inputs(op); + } + + std::vector RelevantOutputs(pir::Operation* op) { + return impl_->relevant_outputs(op); + } + + private: + Concept* impl_; +}; + +} // namespace dialect +} // namespace paddle + +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LayoutTransformationInterface) diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp new file mode 100644 index 0000000000000..c1860cbbac108 --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp @@ -0,0 +1,60 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/common/enforce.h" +#include "paddle/common/layout.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/operation.h" +#include "paddle/pir/include/core/type_name.h" + +namespace paddle { +namespace dialect { + +template +common::DataLayout PreferLayoutImpl(pir::Operation* op) { + return common::DataLayout::ALL_LAYOUT; +} + +template +void RewriteByLayoutImpl(pir::Operation* op, common::DataLayout new_layout) { + PADDLE_THROW(common::errors::Unimplemented( + "Op %s should have a specialized RewriteByLayout function", + pir::get_type_name())); +} + +template +std::vector RelevantInputsImpl(pir::Operation* op) { + return op->operands_source(); +} + +template +std::vector RelevantOutputsImpl(pir::Operation* op) { + return op->results(); +} + +class FusedConv2dAddActOp; +template <> +common::DataLayout PreferLayoutImpl(pir::Operation*); +extern template common::DataLayout PreferLayoutImpl( + pir::Operation*); +template <> +void RewriteByLayoutImpl(pir::Operation*, + common::DataLayout); +extern template void RewriteByLayoutImpl( + pir::Operation*, common::DataLayout); + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc index a86e19ccfe0a6..939f91154de5b 100644 --- a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc +++ b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc @@ -22,11 +22,17 @@ namespace dialect { ApiBuilder::ApiBuilder() : ctx_(pir::IrContext::Instance()), builder_(std::make_shared(ctx_)) { - IR_ENFORCE(builder_ != nullptr, "api builder construct error!"); + PADDLE_ENFORCE_NE( + builder_, + nullptr, + phi::errors::InvalidArgument("api builder construct error!")); } void ApiBuilder::SetProgram(pir::Program* program) { - IR_ENFORCE(program != nullptr, "argument of program is nullptr"); + PADDLE_ENFORCE_NE( + program, + nullptr, + phi::errors::InvalidArgument("argument of program is nullptr")); builder_->SetInsertionPointToBlockEnd(program->block()); } @@ -50,8 +56,10 @@ void ApiBuilder::SetParameter(const std::string& name, } void ApiBuilder::LoadInsertionPoint() { - IR_ENFORCE(!insertion_point_stack_.empty(), - "insertion_point_stack_ is empty."); + PADDLE_ENFORCE_EQ( + !insertion_point_stack_.empty(), + true, + phi::errors::InvalidArgument("insertion_point_stack_ is empty.")); builder_->set_insertion_point(insertion_point_stack_.top()); insertion_point_stack_.pop(); } diff --git a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc index f674c35096018..a395f433a3b43 100644 --- a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc @@ -827,16 +827,27 @@ void HasElementsOp::Build(pir::Builder &builder, // NOLINT void HasElementsOp::VerifySig() { VLOG(4) << "Verifying inputs, outputs ,attributes for: HasElementsOp."; // Verify inputs: - IR_ENFORCE(num_operands() == 1u, "The size of inputs must equal to 1."); - IR_ENFORCE(operand_type(0).isa(), - "The first input of cf.has_elements must be container type."); + PADDLE_ENFORCE_EQ( + num_operands(), + 1u, + phi::errors::InvalidArgument("The size of inputs must equal to 1.")); + PADDLE_ENFORCE_EQ( + operand_type(0).isa(), + true, + phi::errors::InvalidArgument( + "The first input of cf.has_elements must be container type.")); // No attributes should be verify. // Verify outputs: - IR_ENFORCE(num_results() == 1u, "The size of outputs must be equal to 1."); - IR_ENFORCE((*this)->result_type(0).isa(), - "The type of cf.has_elements' output is not correct."); + PADDLE_ENFORCE_EQ( + num_results(), + 1u, + phi::errors::InvalidArgument("The size of outputs must be equal to 1.")); + PADDLE_ENFORCE_EQ((*this)->result_type(0).isa(), + true, + phi::errors::InvalidArgument( + "The type of cf.has_elements' output is not correct.")); } const char *AssertOp::attributes_name[1] = {"summarize"}; @@ -886,51 +897,69 @@ void AssertOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); if ((*this)->operand_source(0).type().isa()) { - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .dyn_cast() - .dtype() - .isa(), - "Type validation failed for the 0th input, it should be a " - "bool DenseTensorType."); + PADDLE_ENFORCE_EQ( + (*this) + ->operand_source(0) + .type() + .dyn_cast() + .dtype() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, it should be a " + "bool DenseTensorType.")); } if (auto vec_type = (*this)->operand(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa() || - vec_type[i].isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ( + vec_type[i].isa() || + vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } else { - IR_ENFORCE( + PADDLE_ENFORCE_EQ( (*this)->operand(1).type().isa() || (*this) ->operand(1) .type() .isa(), - "Type validation failed for the 1th input."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("summarize") > 0, "summarize does not exist."); - IR_ENFORCE(attributes.at("summarize").isa(), - "Type of attribute: summarize is not pir::Int64Attribute."); + PADDLE_ENFORCE_GT( + attributes.count("summarize"), + 0, + phi::errors::InvalidArgument("summarize does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("summarize").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: summarize is not pir::Int64Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 0u, - "The size %d of outputs must be equal to 0.", - output_size); + PADDLE_ENFORCE_EQ( + output_size, + 0u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 0.", output_size)); // Outputs num is 0, not need to check outputs type. } VLOG(4) << "End Verifying for: AssertOp."; @@ -941,74 +970,104 @@ void SelectInputOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto in_size = num_operands(); - IR_ENFORCE(in_size == 3u, "Size %d of inputs must be 3.", in_size); + PADDLE_ENFORCE_EQ( + in_size, + 3u, + phi::errors::InvalidArgument("Size %d of inputs must be 3.", in_size)); auto input1 = (*this)->operand_source(1).type(); auto input2 = (*this)->operand_source(2).type(); if (input1.isa() && input2.isa()) { auto tensor1 = input1.dyn_cast(); auto tensor2 = input2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); } else if (input1.isa() && input2.isa()) { auto tensor1 = input1.dyn_cast(); auto tensor2 = input1.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); - IR_ENFORCE( - tensor1.place() == tensor2.place(), - "The 1st input place %s should be equal to 2ed input place %s.", + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); + PADDLE_ENFORCE_EQ( tensor1.place(), - tensor2.place()); + tensor2.place(), + phi::errors::InvalidArgument( + "The 1st input place %s should be equal to 2ed input place %s.", + tensor1.place(), + tensor2.place())); } else { - IR_ENFORCE(input1 == input2, - "The 1st input type %s should be equal to 2ed input type %s.", - input1, - input2); + PADDLE_ENFORCE_EQ( + input1, + input2, + phi::errors::InvalidArgument( + "The 1st input type %s should be equal to 2ed input type %s.", + input1, + input2)); } } VLOG(4) << "Verifying outputs:"; { auto out_size = num_results(); - IR_ENFORCE( - out_size == 1u, "Size %d of outputs must be equal to 1.", out_size); + PADDLE_ENFORCE_EQ(out_size, + 1u, + phi::errors::InvalidArgument( + "Size %d of outputs must be equal to 1.", out_size)); } VLOG(4) << "End Verifying for: AssignArray_Op."; } @@ -1061,13 +1120,18 @@ void SelectOutputOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto in_size = num_operands(); - IR_ENFORCE(in_size == 2u, "Size %d of inputs must be 2.", in_size); + PADDLE_ENFORCE_EQ( + in_size, + 2u, + phi::errors::InvalidArgument("Size %d of inputs must be 2.", in_size)); } VLOG(4) << "Verifying outputs:"; { auto out_size = num_results(); - IR_ENFORCE( - out_size == 2u, "Size %d of outputs must be equal to 2.", out_size); + PADDLE_ENFORCE_EQ(out_size, + 2u, + phi::errors::InvalidArgument( + "Size %d of outputs must be equal to 2.", out_size)); auto out1 = (*this)->result(0).type(); auto out2 = (*this)->result(1).type(); @@ -1075,58 +1139,83 @@ void SelectOutputOp::VerifySig() { out2.isa()) { auto tensor1 = out1.dyn_cast(); auto tensor2 = out2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); } else if (out1.isa() && out2.isa()) { auto tensor1 = out1.dyn_cast(); auto tensor2 = out2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); - IR_ENFORCE( - tensor1.place() == tensor2.place(), - "The 1st input place %s should be equal to 2ed input place %s.", + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); + PADDLE_ENFORCE_EQ( tensor1.place(), - tensor2.place()); + tensor2.place(), + phi::errors::InvalidArgument( + "The 1st input place %s should be equal to 2ed input place %s.", + tensor1.place(), + tensor2.place())); } else { - IR_ENFORCE(out1 == out2, - "The 1st input type %s should be equal to 2ed input type %s.", - out1, - out2); + PADDLE_ENFORCE_EQ( + out1, + out2, + phi::errors::InvalidArgument( + "The 1st input type %s should be equal to 2ed input type %s.", + out1, + out2)); } } VLOG(4) << "End Verifying for: AssignArray_Op."; diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc index 4e4b7f46b382c..17d9a1dadc903 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc @@ -124,16 +124,21 @@ void ExpandOp::Build(pir::Builder& builder, pir::AttributeMap attributes) { VLOG(4) << "Start build ExpandOp"; - IR_ENFORCE(attributes.find("shape") != attributes.end(), - "'shape' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE(attributes.find("shape"), + attributes.end(), + phi::errors::InvalidArgument( + "'shape' Attribute is expected for ExpandOp. ")); std::vector shape = attributes.at("shape") .dyn_cast() .data() .GetData(); - IR_ENFORCE(attributes.find("mkldnn_data_type") != attributes.end(), - "'mkldnn_data_type' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE( + attributes.find("mkldnn_data_type"), + attributes.end(), + phi::errors::InvalidArgument( + "'mkldnn_data_type' Attribute is expected for ExpandOp. ")); std::string mkldnn_data_type = attributes.at("mkldnn_data_type") .dyn_cast() .AsString(); @@ -190,48 +195,66 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); if (auto vec_type = (*this)->operand_source(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } } else { - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } } VLOG(4) << "Verifying attributes:"; { auto& attributes = this->attributes(); - IR_ENFORCE(attributes.count("mkldnn_data_type") > 0, - "mkldnn_data_type does not exist."); - IR_ENFORCE(attributes.at("mkldnn_data_type").isa(), - "Type of attribute: mkldnn_data_type is not pir::StrAttribute."); + PADDLE_ENFORCE_GT( + attributes.count("mkldnn_data_type"), + 0, + phi::errors::InvalidArgument("mkldnn_data_type does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("mkldnn_data_type").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: mkldnn_data_type is not pir::StrAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: ExpandOp."; } @@ -248,9 +271,11 @@ std::vector ExpandOp::InferMeta( p_attributes, common::errors::Fatal( "AttrtibueMap pointer in InferMeta function is nullptr.")); - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value shape_ = input_values[1]; diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index c5dc4457b737e..07bf3a917d74e 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -148,9 +148,11 @@ std::vector AddNOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddNOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -294,9 +296,11 @@ std::vector AddN_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddN_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -444,9 +448,11 @@ std::vector AddNArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddNArrayOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; pir::VectorType inputs = inputs_.type().dyn_cast(); @@ -670,9 +676,11 @@ std::vector FusedGemmEpilogueOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta FusedGemmEpilogueOp"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; pir::Value bias_ = input_values[2]; @@ -919,9 +927,11 @@ std::vector FusedGemmEpilogueGradOp::InferMeta( common::errors::Fatal( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; - IR_ENFORCE(input_values.size() == 4, - "Num of inputs is expected to be 4 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 4, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 4 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; @@ -1218,9 +1228,11 @@ std::vector SplitGradOp::InferMeta( pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SplitGradOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value out_grad_ = input_values[0]; pir::Value axis_ = input_values[1]; @@ -1479,9 +1491,11 @@ std::vector CreateArrayLikeOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta CreateArrayLikeOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value input_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -1600,9 +1614,11 @@ std::vector ArrayLengthOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayLengthOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; paddle::dialect::DenseTensorArrayType x_type; @@ -1756,9 +1772,11 @@ std::vector ArrayReadOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayLengthOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value array_ = input_values[0]; pir::Value i_ = input_values[1]; @@ -1924,9 +1942,11 @@ std::vector ArrayWrite_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayWrite_Op"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value array_ = input_values[0]; pir::Value x_ = input_values[1]; @@ -2121,17 +2141,23 @@ std::vector ArrayToTensorOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta ArrayToTensorOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("axis") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("axis"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); int32_t axis = attributes.at("axis").dyn_cast().data(); - IR_ENFORCE(attributes.find("use_stack") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("use_stack"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); bool use_stack = attributes.at("use_stack").dyn_cast().data(); @@ -2315,21 +2341,27 @@ std::vector TensorToArrayOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta TensorToArrayOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value out_grad_ = input_values[1]; VLOG(4) << "Builder construction attributes"; pir::AttributeMap argument_attributes = {}; - IR_ENFORCE(attributes.find("axis") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("axis"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); int32_t axis = attributes.at("axis").dyn_cast().data(); - IR_ENFORCE(attributes.find("use_stack") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("use_stack"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); bool use_stack = attributes.at("use_stack").dyn_cast().data(); @@ -2430,39 +2462,53 @@ void SliceArrayOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 3u, - "The size %d of inputs must be equal to 3.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this)->operand_source(1).type().isa() || - (*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(1).type()); - IR_ENFORCE((*this)->operand_source(2).type().isa() || - (*this) - ->operand_source(2) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(2).type()); + PADDLE_ENFORCE_EQ( + input_size, + 3u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 3.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(1).type().isa() || + (*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(1).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(2).type().isa() || + (*this) + ->operand_source(2) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(2).type())); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: SliceArrayOp."; } @@ -2527,9 +2573,11 @@ std::vector SliceArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SliceArrayOp"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value input = input_values[0]; pir::Value starts = input_values[1]; pir::Value ends = input_values[2]; @@ -2622,32 +2670,43 @@ void SliceArrayDenseOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this)->operand_source(1).type().isa() || - (*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(1).type().isa() || + (*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(1).type())); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: SliceArrayOp."; } @@ -2678,9 +2737,11 @@ std::vector SliceArrayDenseOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SliceArrayDenseOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value input = input_values[0]; pir::Value starts = input_values[1]; @@ -2772,15 +2833,19 @@ void AssignArrayOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { @@ -2789,12 +2854,16 @@ void AssignArrayOp::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignArrayOp."; } @@ -2817,9 +2886,11 @@ std::vector AssignArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AssignArrayOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -2885,26 +2956,35 @@ void AssignArray_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, but got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ( + (*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, but got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignArray_Op."; } @@ -2918,9 +2998,11 @@ std::vector AssignArray_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AssignArray_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -3022,8 +3104,10 @@ void ExpandOp::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build ExpandOp"; - IR_ENFORCE(attributes.find("shape") != attributes.end(), - "'shape' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE(attributes.find("shape"), + attributes.end(), + phi::errors::InvalidArgument( + "'shape' Attribute is expected for ExpandOp. ")); std::vector shape = attributes.at("shape") .dyn_cast() @@ -3114,10 +3198,13 @@ bool ExpandOp::InferSymbolicShape( if (expand_shape[i] == -1) { // copy the dim from x // the shape is right aligned int index = i - (expand_shape.size() - x_dims.size()); - IR_ENFORCE(index >= 0, - "in ExpandOpInferSymbolicShape, the dim to copy must >= 0, " - "but got %d", - index); + PADDLE_ENFORCE_GE( + index, + 0, + phi::errors::InvalidArgument( + "in ExpandOpInferSymbolicShape, the dim to copy must >= 0, " + "but got %d", + index)); out_shape[i] = x_dims[index]; } @@ -3136,26 +3223,34 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input."); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input.")); if (auto vec_type = (*this)->operand_source(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ(vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } else { - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } VLOG(4) << "Verifying attributes:"; @@ -3165,12 +3260,16 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: ExpandOp."; } @@ -3184,9 +3283,11 @@ std::vector ExpandOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ExpandOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value shape_ = input_values[1]; @@ -3341,8 +3442,10 @@ void IncrementOp::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build IncrementOp"; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction inputs"; @@ -3367,32 +3470,45 @@ void IncrementOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("value") > 0, "value does not exist."); - IR_ENFORCE(attributes.at("value").isa(), - "Type of attribute: value is not pir::FloatAttribute."); + PADDLE_ENFORCE_GT(attributes.count("value"), + 0, + phi::errors::InvalidArgument("value does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("value").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: value is not pir::FloatAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: IncrementOp."; } @@ -3411,13 +3527,17 @@ std::vector IncrementOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta IncrementOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction outputs"; @@ -3526,8 +3646,10 @@ void Increment_Op::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build Increment_Op"; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for Increment_Op. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for Increment_Op. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction inputs"; @@ -3553,32 +3675,45 @@ void Increment_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("value") > 0, "value does not exist."); - IR_ENFORCE(attributes.at("value").isa(), - "Type of attribute: value is not pir::FloatAttribute."); + PADDLE_ENFORCE_GT(attributes.count("value"), + 0, + phi::errors::InvalidArgument("value does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("value").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: value is not pir::FloatAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: Increment_Op."; } @@ -3597,13 +3732,17 @@ std::vector Increment_Op::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta Increment_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for Increment_Op. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for Increment_Op. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction outputs"; @@ -3715,21 +3854,27 @@ void AssignOut_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } VLOG(4) << "Verifying attributes:"; { @@ -3738,12 +3883,16 @@ void AssignOut_Op::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignOut_Op."; } @@ -3756,9 +3905,11 @@ void AssignOut_Op::InferMeta(phi::InferMetaContext *infer_meta) { std::vector AssignOut_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -3836,9 +3987,11 @@ std::vector ShapeBroadcastOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ShapeBroadcastOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; @@ -3921,11 +4074,14 @@ symbol::DimExpr GetBroadcastDimExpr(const symbol::DimExpr &lhs, std::vector ComputeBroadcastShape( const std::vector &large_shape, const std::vector &small_shape) { - IR_ENFORCE(large_shape.size() >= small_shape.size(), - "Size of large_shape is expected to be greater or equal size of " - "small_shape, but got [%d] >= [%d].", - large_shape.size(), - small_shape.size()); + PADDLE_ENFORCE_GE( + large_shape.size(), + small_shape.size(), + phi::errors::InvalidArgument( + "Size of large_shape is expected to be greater or equal size of " + "small_shape, but got [%d] >= [%d].", + large_shape.size(), + small_shape.size())); std::vector output_data; output_data.reserve(large_shape.size()); auto rank_gap = large_shape.size() - small_shape.size(); @@ -3944,16 +4100,22 @@ bool ShapeBroadcastOp::InferSymbolicShape( pir::Value x = operand_source(0); pir::Value y = operand_source(1); - IR_ENFORCE(shape_analysis->HasShapeOrDataForValue(x) > 0, - "Value x does not exist."); - IR_ENFORCE(shape_analysis->HasShapeOrDataForValue(y) > 0, - "Value y does not exist."); + PADDLE_ENFORCE_GT(shape_analysis->HasShapeOrDataForValue(x), + 0, + phi::errors::InvalidArgument("Value x does not exist.")); + PADDLE_ENFORCE_GT(shape_analysis->HasShapeOrDataForValue(y), + 0, + phi::errors::InvalidArgument("Value y does not exist.")); const auto &x_data_shape = shape_analysis->GetShapeOrDataForValue(x); const auto &y_data_shape = shape_analysis->GetShapeOrDataForValue(y); - IR_ENFORCE(x_data_shape.data().has_value(), - "Value x comes from ShapeOp, it must have data"); - IR_ENFORCE(y_data_shape.data().has_value(), - "Value y comes from ShapeOp, it must have data"); + PADDLE_ENFORCE_EQ(x_data_shape.data().has_value(), + true, + phi::errors::InvalidArgument( + "Value x comes from ShapeOp, it must have data")); + PADDLE_ENFORCE_EQ(y_data_shape.data().has_value(), + true, + phi::errors::InvalidArgument( + "Value y comes from ShapeOp, it must have data")); const auto &x_data = x_data_shape.data().value(); const auto &y_data = y_data_shape.data().value(); @@ -4005,34 +4167,48 @@ void MemcpyD2hMultiIoOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("dst_place_type") > 0, - "dst_place_type does not exist."); - IR_ENFORCE(attributes.at("dst_place_type").isa(), - "Type of attribute: dst_place_type is not pir::Int32Attribute."); + PADDLE_ENFORCE_GT( + attributes.count("dst_place_type"), + 0, + phi::errors::InvalidArgument("dst_place_type does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("dst_place_type").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: dst_place_type is not pir::Int32Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); auto output_0_type = (*this)->result(0).type(); - IR_ENFORCE(output_0_type.isa(), - "Type validation failed for the 0th output."); + PADDLE_ENFORCE_EQ( + output_0_type.isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: MemcpyD2hMultiIoOp."; } @@ -4045,9 +4221,11 @@ void MemcpyD2hMultiIoOp::InferMeta(phi::InferMetaContext *infer_meta) { std::vector MemcpyD2hMultiIoOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -4130,35 +4308,50 @@ void ArrayPopOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("index") > 0, "index does not exist."); - IR_ENFORCE(attributes.at("index").isa(), - "Type of attribute: index is not pir::Int32Attribute."); + PADDLE_ENFORCE_GT(attributes.count("index"), + 0, + phi::errors::InvalidArgument("index does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("index").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: index is not pir::Int32Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 2u, - "The size %d of outputs must be equal to 2.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 2.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); - IR_ENFORCE( + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); + PADDLE_ENFORCE_EQ( (*this)->result(1).type().isa(), - "Type validation failed for the 1st output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st output.")); } VLOG(4) << "End Verifying for: ArrayPopOp."; } @@ -4198,9 +4391,11 @@ std::vector ArrayPopOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta ArrayPopOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value input = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -4213,8 +4408,10 @@ std::vector ArrayPopOp::InferMeta( "paddle::dialect::AllocatedDenseTensorArrayType")); } - IR_ENFORCE(attributes.find("index") != attributes.end(), - "'index' Attribute is expected for ArrayPopOp. "); + PADDLE_ENFORCE_NE(attributes.find("index"), + attributes.end(), + phi::errors::InvalidArgument( + "'index' Attribute is expected for ArrayPopOp. ")); int index = attributes.at("index").dyn_cast().data(); paddle::dialect::IrTensor dense_input( diff --git a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc index 2f4c9a2b7e504..8a843a8881734 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc @@ -30,7 +30,7 @@ phi::DataLayout DataLayoutAttribute::data() const { return storage()->GetAsKey(); } -phi::Scalar ScalarAttribute::data() { +phi::Scalar ScalarAttribute::data() const { if (isa()) { return phi::Scalar(dyn_cast().data()); } else if (isa()) { diff --git a/paddle/fluid/pir/dialect/operator/ir/op_attribute.h b/paddle/fluid/pir/dialect/operator/ir/op_attribute.h index 153414c7ad0d0..b7a54d6ca58d2 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_attribute.h +++ b/paddle/fluid/pir/dialect/operator/ir/op_attribute.h @@ -37,6 +37,8 @@ class IntArrayAttribute : public pir::Attribute { static IntArrayAttribute Parse(pir::IrParser &parser); // NOLINT const phi::IntArray &data() const; + + static std::string name() { return "a_intarray"; } }; class ScalarAttribute : public pir::Attribute { @@ -59,7 +61,9 @@ class ScalarAttribute : public pir::Attribute { return TransToIrAttribute(scalar, ctx); } - phi::Scalar data(); + phi::Scalar data() const; + + static std::string name() { return "a_scalar"; } }; class DataTypeAttribute : public pir::Attribute { @@ -76,6 +80,8 @@ class DataTypeAttribute : public pir::Attribute { static DataTypeAttribute Parse(pir::IrParser &parser); // NOLINT phi::DataType data() const; + + static std::string name() { return "a_dtype"; } }; class PlaceAttribute : public pir::Attribute { @@ -91,6 +97,7 @@ class PlaceAttribute : public pir::Attribute { static PlaceAttribute Parse(pir::IrParser &parser); // NOLINT phi::Place data() const; + static std::string name() { return "a_place"; } }; class DataLayoutAttribute : public pir::Attribute { diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc index f60bdd115cf36..fa9fccaba2701 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc @@ -46,9 +46,11 @@ struct CombineOpInferSymbolicShapeInterfaceModel const auto shape_data_list = [&] { symbol::TensorListShapeOrDataDimExprs shape_data_list; for (size_t i = 0; i < op->num_operands(); ++i) { - IR_ENFORCE(op->operand(i).type().dyn_cast(), - "Currently InferSymbolicShape of CombineOp only support " - "DenseTensorType."); + PADDLE_ENFORCE_NOT_NULL( + op->operand(i).type().dyn_cast(), + phi::errors::InvalidArgument( + "Currently InferSymbolicShape of CombineOp only support " + "DenseTensorType.")); shape_data_list.emplace_back( shape_analysis->GetShapeOrDataForValue(op->operand_source(i)) @@ -70,9 +72,11 @@ struct ConstantOpInferSymbolicShapeInterfaceModel : public InferSymbolicShapeInterface::Concept { static inline bool InferSymbolicShape( pir::Operation* op, pir::ShapeConstraintIRAnalysis* shape_analysis) { - IR_ENFORCE(op->result(0).type().dyn_cast(), - "Currently InferSymbolicShape of ConstantOp only support " - "DenseTensorType result."); + PADDLE_ENFORCE_NOT_NULL( + op->result(0).type().dyn_cast(), + phi::errors::InvalidArgument( + "Currently InferSymbolicShape of ConstantOp only support " + "DenseTensorType result.")); const std::vector out_dims = [op] { std::vector dims; diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml index 4da4f54c3ac90..11ff0e8f47c90 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml @@ -1,7 +1,7 @@ # The operators included in this file are: # 1) Operators defined only in PIR, dynamic graphs do not exist; # 2) The definitions of static graphs and dynamic graphs are inconsistent, but the final definition plan has not yet been clarified. -# After the definition is clearly defined, migrate to paddle /fluid/pir/dialect/operator/ir/update_ops.yaml or paddle/phi/api/yaml/ops.yaml +# After the definition is clearly defined, migrate to paddle/fluid/pir/dialect/operator/ir/update_ops.yaml or paddle/phi/api/yaml/ops.yaml - op : adadelta_ args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) @@ -22,6 +22,7 @@ spmd_rule : ElementwiseBinaryInferSpmd kernel : func : add + data_type: x inplace : (x -> out) backward : add_grad data_transform : @@ -107,6 +108,14 @@ inplace : (output -> out) backward : assign_out__grad +- op : assign_pos + args : (Tensor x, Tensor cum_count, Tensor eff_num_len) + output : Tensor(out) + infer_meta : + func : AssignPosInferMeta + kernel : + func : assign_pos + - op : assign_value args : (int[] shape, DataType dtype, Scalar[] values, Place place = {}) output : Tensor(out) @@ -470,6 +479,24 @@ optional : in_accum, in_state, out_scale, out_accum, out_state inplace : (scale -> out_scale, in_accum -> out_accum, in_state -> out_state) +- op : dgc + args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float[] sparsity, float m=0.9, bool use_nesterov=true, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) + output : Tensor(u_out), Tensor(v_out), Tensor(encode_grad), Tensor(grad_out), Tensor(k), Tensor(gather_buff) + kernel : + func : dgc + param : [u, v, grad, param, current_step, nranks, m, use_nesterov, sparsity, rampup_begin_step, rampup_step, regular_coeff, regular_type] + optional: param + +- op : dgc_momentum + args : (Tensor param, Tensor grad, Tensor velocity, Tensor learning_rate, Tensor master_param, Tensor current_step_tensor, Tensor nranks_tensor, float mu, bool use_nesterov=false, str regularization_method="", float regularization_coeff=0.0f, bool multi_precision=false, float rescale_grad=1.0f, float rampup_begin_step=-1.0f) + output : Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out), Tensor(grad_out) + infer_meta : + func : DGCMomentumInferMeta + kernel : + func : dgc_momentum + data_type : param + optional : master_param, master_param_out + - op : disable_check_model_nan_inf args: (Tensor x, int flag = 0) output: Tensor(out) @@ -491,6 +518,16 @@ data_type : fpn_rois optional : rois_num, multi_level_rois_num +- op : distributed_fused_lamb + args : (Tensor[] param, Tensor[] grad, Tensor fp32_fused_param, Tensor fp32_fused_grad, Tensor fp16_fused_param, Tensor fp16_fused_grad, Tensor moment1, Tensor moment2, Tensor beta1pow, Tensor beta2pow, Tensor fused_param_offsets, Tensor fp32_shard_fused_param_offsets, Tensor fp16_shard_fused_param_offsets, Tensor param_info, Tensor param_order, Tensor learning_rate, Tensor global_scale, float beta1, float beta2, float epsilon, float max_global_grad_norm, float weight_decay, bool clip_after_allreduce, int[] ring_ids= {}, int acc_steps = 1, bool use_master_param_norm = true, bool use_master_acc_grad = true, bool is_grad_scaled_by_nranks = true, int64_t nranks = 1, bool use_hierarchical_allreduce = false) + output : Tensor(fp32_fused_param_out), Tensor(fp16_fused_param_out), Tensor(fp32_acc_fused_grad), Tensor(fp16_acc_fused_grad), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1pow_out), Tensor(beta2pow_out), Tensor[](param_out){param.size()}, Tensor(found_inf), Tensor(acc_step), Tensor(stop_update), Tensor(step) + kernel : + func : distributed_fused_lamb + data_type : DataType::FLOAT32 + param : [param, grad, fp32_fused_param, fp32_fused_grad, fp16_fused_param, fp16fused_grad, moment1, moment2, beta1pow, beta2pow, fused_param_offsets, fp32_shard_fused_param_offsets, fp16_shard_fused_param_offsets, param_info, param_order, learning_rate, global_scale, acc_steps, beta1, beta2, epsilon, max_global_grad_norm, weight_decay, clip_after_allreduce, use_master_param_norm, use_master_acc_grad, is_grad_scaled_by_nranks, use_hierarchical_allreduce, nranks, ring_ids] + optional : fp32_fused_param, fp32_fused_grad, fp16_fused_param, fp16_fused_grad, fp32_fused_param_out, fp16_fused_param_out, fp32_acc_fused_grad, fp16_acc_fused_grad, acc_step, stop_update + inplace : (fp32_fused_param -> fp32_fused_param_out), (fp16_fused_param -> fp16_fused_param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1pow -> beta1pow_out), (beta2pow -> beta2pow_out), (param -> param_out) + - op : distributed_fused_lamb_init args : (Tensor[] param, Tensor[] grad, float beta1, float beta2, int[] apply_weight_decay, int alignment, int rank, int nranks) output : Tensor(fp32_fused_param), Tensor(fp32_fused_grad), Tensor(fp16_fused_param), Tensor(fp16_fused_grad), Tensor(moment1), Tensor(moment2), Tensor(beta1_pow), Tensor(beta2_pow), Tensor(fused_param_offsets), Tensor(fp32_shard_fused_param_offsets), Tensor(fp16_shard_fused_param_offsets), Tensor(param_info), Tensor(param_order), Tensor[](param_out){param.size()}, Tensor[](master_param_out){param.size()}, Tensor[](grad_out){grad.size()}, Tensor(global_scale), Tensor(step) @@ -661,6 +698,7 @@ args : (str name, int col) output : Tensor(out) interfaces : paddle::dialect::InferSymbolicShapeInterface + traits: pir::ImmutableLayoutTrait - op : fetch args : (Tensor x, str name, int col) @@ -671,7 +709,7 @@ kernel : func : fetch param : [x] - traits : pir::SideEffectTrait + traits : pir::SideEffectTrait, pir::ImmutableLayoutTrait interfaces : paddle::dialect::InferSymbolicShapeInterface - op : floor_divide @@ -748,7 +786,7 @@ skip_transform : x - op : full_with_tensor - args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32) + args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32) output: Tensor(out) infer_meta : func : FullWithTensorInferMeta diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml index 9ab68a7e52eb6..452b845a43a1a 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml @@ -27,6 +27,7 @@ spmd_rule : ElementwiseBinaryGradInferSpmd kernel : func : add_grad + data_type: out_grad no_need_buffer : x, y composite : add_grad(x, y, out_grad, axis, x_grad, y_grad) backward : add_double_grad diff --git a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h index 86370dd0cc6c1..e8719d4adb73e 100644 --- a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h +++ b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h @@ -98,8 +98,9 @@ struct OpRunTimeInfo { std::vector skip_transform_inputs; pir::AttributeMap extra_args_default_value; std::vector data_format_tensors; - bool is_onednn_only; - bool dynamic_fallback; + bool is_onednn_only = false; + bool dynamic_fallback = false; + OpRunTimeInfo() = default; OpRunTimeInfo(const std::string& infer_meta_func, const std::vector& infer_meta_param, diff --git a/paddle/fluid/pir/drr/CMakeLists.txt b/paddle/fluid/pir/drr/CMakeLists.txt index b23774a431795..ded64839dc97f 100644 --- a/paddle/fluid/pir/drr/CMakeLists.txt +++ b/paddle/fluid/pir/drr/CMakeLists.txt @@ -87,7 +87,7 @@ if(WITH_CINN) set(DRR_SRCS ${DRR_SRCS} ${CINN_SOURCE_FILE}) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(onednn_dialect_name onednn_op) set(pir_op_onednn_yaml ${PADDLE_BINARY_DIR}/paddle/fluid/pir/dialect/operator/ir/generated/onednn.parsed.yaml diff --git a/paddle/fluid/pir/drr/include/drr_pattern_context.h b/paddle/fluid/pir/drr/include/drr_pattern_context.h index b7755f659e85d..17090fb3e210a 100644 --- a/paddle/fluid/pir/drr/include/drr_pattern_context.h +++ b/paddle/fluid/pir/drr/include/drr_pattern_context.h @@ -129,10 +129,11 @@ class TEST_API DrrPatternContext { const Op& ResultOpPattern( const std::string& op_type, - const std::unordered_map& attributes = {}); + const std::unordered_map& attributes = {}, + const std::unordered_map& runtime_attributes = + {}); drr::Tensor& ResultTensorPattern(const std::string& name); - // void RequireEqual(const Attribute& first, const Attribute& second); void RequireEqual(const TensorShape& first, const TensorShape& second); void RequireEqual(const TensorDataType& first, const TensorDataType& second); void RequireNativeCall(const ConstraintFunction& custom_fn); @@ -157,34 +158,28 @@ class Op { const Tensor& arg2) const; TEST_API void operator()(const std::vector& args, const std::vector& outputs) const; - // const Tensor& operator()(const Tensor& arg0, const Tensor& arg1, const - // Tensor& arg2) const; const Tensor& operator()(const Tensor& arg0, const - // Tensor& arg1, const Tensor& arg2, const Tensor& arg3) const; const Tensor& - // operator()(const Tensor& arg0, const Tensor& arg1, const Tensor& arg2, - // const Tensor& arg3, const Tensor& arg4) const; static const char* prefix; private: Op(const std::string& op_type_name, + PatternGraph* pattern_graph, const std::unordered_map& attributes, - PatternGraph* pattern_graph) + const std::unordered_map& runtime_attributes = {}) : op_type_name_(op_type_name), + pattern_graph_(pattern_graph), attributes_(attributes), - pattern_graph_(pattern_graph) {} - - const std::unordered_map& attributes() const { - return attributes_; - } - - friend class DrrPatternContext; - friend class OpCall; + runtime_attributes_(runtime_attributes) {} std::string op_type_name_; - std::unordered_map attributes_; PatternGraph* pattern_graph_{nullptr}; + std::unordered_map attributes_; + std::unordered_map runtime_attributes_; thread_local static int64_t count; + + friend class DrrPatternContext; + friend class OpCall; }; class TEST_API Tensor { @@ -244,7 +239,8 @@ class TEST_API OpCall { : op_name_(op->op_type_name_), inputs_(inputs), outputs_(outputs), - attributes_(op->attributes_) {} + attributes_(op->attributes_), + runtime_attributes_(op->runtime_attributes_) {} const std::string& name() const { return op_name_; } @@ -256,18 +252,24 @@ class TEST_API OpCall { return attributes_; } + const std::unordered_map& runtime_attributes() const { + return runtime_attributes_; + } + private: std::string op_name_; std::vector inputs_; std::vector outputs_; std::unordered_map attributes_; + std::unordered_map runtime_attributes_; }; class TEST_API ResultPattern { public: - const drr::Op& Op( - const std::string& op_type, - const std::unordered_map& attributes = {}); + const drr::Op& + Op(const std::string& op_type, + const std::unordered_map& attributes = {}, + const std::unordered_map& runtime_attributes = {}); drr::Tensor& Tensor(const std::string& name); @@ -304,10 +306,44 @@ class TEST_API ResultPattern { Attribute VectorFloatAttr(const std::vector& value) const; + // {"bool", phi::DataType::BOOL}, + // {"uint8", phi::DataType::UINT8}, + // {"int8", phi::DataType::INT8}, + // {"uint16", phi::DataType::UINT16}, + // {"int16", phi::DataType::INT16}, + // {"uint32", phi::DataType::UINT32}, + // {"int32", phi::DataType::INT32}, + // {"uint64", phi::DataType::UINT64}, + // {"int64", phi::DataType::INT64}, + // {"float32", phi::DataType::FLOAT32}, + // {"complex64", phi::DataType::COMPLEX64}, + // {"complex128", phi::DataType::COMPLEX128}, + // {"Undefined", phi::DataType::UNDEFINED}, + // {"psting", phi::DataType::PSTRING}, + // {"float16", phi::DataType::FLOAT16}, + // {"bfloat16", phi::DataType::BFLOAT16}, + // {"float64", phi::DataType::FLOAT64}}; Attribute DataTypeAttr(const std::string& value) const; + // {"cpu", phi::CPUPlace{}}, + // {"gpu", phi::GPUPlace{}}, + // {"gpu_pinned", phi::GPUPinnedPlace{}}, + // {"xpu", phi::XPUPlace{}}, + // {"ipu", phi::IPUPlace{}}, + // {":", phi::CustomPlace{}}, + // {"undefined", phi::Place{}}}; Attribute PlaceAttr(const std::string& value) const; + // {"NHWC", phi::DataLayout::kNHWC}, + // {"NCHW", phi::DataLayout::kNCHW}, + // {"Undefined", phi::DataLayout::kAnyLayout}, + // {"ONEDNN", phi::DataLayout::ONEDNN}, + // {"SPARSE_COO", phi::DataLayout::SPARSE_COO}, + // {"SPARSE_CSR", phi::DataLayout::SPARSE_CSR}, + // {"NDHWC", phi::DataLayout::kNDHWC}, + // {"NCDHW", phi::DataLayout::kNCDHW}, + // {"PSTRING_UNION", phi::DataLayout::PSTRING_UNION}, + // {"STRIDED", phi::DataLayout::STRIDED}}; Attribute DataLayoutAttr(const std::string& value) const; Attribute ComputeAttr(const AttrComputeFunc& attr_compute_func) const; diff --git a/paddle/fluid/pir/drr/src/ir_operation_factory.cc b/paddle/fluid/pir/drr/src/ir_operation_factory.cc index e625db38d1b8f..20a281dd12d36 100644 --- a/paddle/fluid/pir/drr/src/ir_operation_factory.cc +++ b/paddle/fluid/pir/drr/src/ir_operation_factory.cc @@ -15,19 +15,22 @@ #include #include "paddle/common/layout.h" + #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" +#ifdef PADDLE_WITH_DNNL +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#endif #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/include/drr_pattern_context.h" #include "paddle/fluid/pir/drr/src/attr_type_uilts.h" #include "paddle/fluid/pir/drr/src/ir_operation_factory.h" -#include "paddle/phi/core/enforce.h" + #include "paddle/pir/include/core/builtin_attribute.h" #include "paddle/pir/include/core/builtin_op.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" -#ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" -#endif + +#include "paddle/phi/core/enforce.h" namespace paddle { namespace drr { @@ -317,10 +320,11 @@ pir::Attribute CreateIrAttribute(const std::any& obj) { } } -pir::AttributeMap CreateAttributeMap(const OpCall& op_call, - const MatchContextImpl& src_match_ctx) { +pir::AttributeMap CreateAttributeMap( + const std::unordered_map& attrs, + const MatchContextImpl& src_match_ctx) { pir::AttributeMap attr_map; - for (const auto& kv : op_call.attributes()) { + for (const auto& kv : attrs) { std::visit( [&](auto&& arg) { if constexpr (std::is_same_v, @@ -339,12 +343,12 @@ pir::AttributeMap CreateAttributeMap(const OpCall& op_call, return attr_map; } -pir::Value GetIrValueByDrrTensor(const Tensor& tensor, +pir::Value GetIrValueByDrrTensor(const Tensor* tensor, const MatchContextImpl& res_match_ctx) { - if (tensor.is_none()) { + if (tensor->is_none()) { return pir::Value{}; } - return res_match_ctx.GetIrValue(tensor.name()); + return res_match_ctx.GetIrValue(tensor->name()); } std::vector GetIrValuesByDrrTensors( @@ -353,16 +357,21 @@ std::vector GetIrValuesByDrrTensors( std::vector ir_values; ir_values.reserve(tensors.size()); for (const auto* tensor : tensors) { - ir_values.push_back(GetIrValueByDrrTensor(*tensor, res_match_ctx)); + ir_values.push_back(GetIrValueByDrrTensor(tensor, res_match_ctx)); } return ir_values; } -void BindIrOutputs(const OpCall& op_call, - pir::Operation* op, - MatchContextImpl* match_ctx) { - for (size_t i = 0; i < op_call.outputs().size(); ++i) { - match_ctx->BindIrValue(op_call.outputs()[i]->name(), op->result(i)); +void BindIrOutputsWithDrrOutputs(const std::vector& tensors, + pir::Operation* op, + MatchContextImpl* match_ctx) { + PADDLE_ENFORCE_LE( + tensors.size(), + op->num_results(), + phi::errors::InvalidArgument( + "The size of drr outputs should less equal the size of pir outputs")); + for (size_t i = 0; i < tensors.size(); ++i) { + match_ctx->BindIrValue(tensors[i]->name(), op->result(i)); } } @@ -371,15 +380,17 @@ pir::Operation* CreateOperation(const OpCall& op_call, pir::PatternRewriter& rewriter, // NOLINT MatchContextImpl* res_match_ctx) { VLOG(6) << "Drr create [" << op_call.name() << "] op..."; - const auto& inputs = op_call.inputs(); - std::vector ir_values = - GetIrValuesByDrrTensors(inputs, *res_match_ctx); pir::Operation* op = OperationFactory::Instance().CreateOperation( op_call.name(), - ir_values, - CreateAttributeMap(op_call, src_match_ctx), + GetIrValuesByDrrTensors(op_call.inputs(), *res_match_ctx), + CreateAttributeMap(op_call.attributes(), src_match_ctx), rewriter); - BindIrOutputs(op_call, op, res_match_ctx); + auto runtime_attr_map = + CreateAttributeMap(op_call.runtime_attributes(), src_match_ctx); + for (const auto& kv : runtime_attr_map) { + op->set_attribute(kv.first, kv.second); + } + BindIrOutputsWithDrrOutputs(op_call.outputs(), op, res_match_ctx); VLOG(6) << "Drr create [" << op_call.name() << " @" << op << "] op done."; return op; } diff --git a/paddle/fluid/pir/drr/src/ir_operation_factory.h b/paddle/fluid/pir/drr/src/ir_operation_factory.h index 23095bf9a73e0..eaf3f866ec60c 100644 --- a/paddle/fluid/pir/drr/src/ir_operation_factory.h +++ b/paddle/fluid/pir/drr/src/ir_operation_factory.h @@ -30,13 +30,13 @@ class OperationFactory { return operation_factory; } - using operation_create_fn = + using OperationCreateFunction = std::function&, const pir::AttributeMap&, pir::PatternRewriter&)>; void RegisterOperationCreator(const std::string& op_name, - const operation_create_fn& create_fn) { + const OperationCreateFunction& create_fn) { op_creator_map[op_name] = create_fn; } @@ -76,7 +76,7 @@ class OperationFactory { #ifdef PADDLE_WITH_DNNL void RegisterOnednnOpGeneratedOpCreator(); #endif - std::unordered_map op_creator_map; + std::unordered_map op_creator_map; }; pir::Operation* CreateOperation(const OpCall& op_call, diff --git a/paddle/fluid/pir/drr/src/match_context_impl.h b/paddle/fluid/pir/drr/src/match_context_impl.h index 12a0dc7a65ab5..a9acb5f6ed8df 100644 --- a/paddle/fluid/pir/drr/src/match_context_impl.h +++ b/paddle/fluid/pir/drr/src/match_context_impl.h @@ -37,10 +37,9 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( tensor_map_.count(tensor_name), 0, - phi::errors::NotFound( - "Not found tensor." - "The Drr tensor [%s] must exist in pattern graph to be obtained.", - tensor_name)); + phi::errors::NotFound("Not found tensor. The drr tensor [%s] must " + "exist in pattern graph to be obtained.", + tensor_name)); return tensor_map_.at(tensor_name); } @@ -48,10 +47,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( operation_map_.count(op_call), 0, - phi::errors::NotFound("Not found operation." - "The Drr operation [%s] must exist in the " - "pattern graph to be obtained.", - op_call->name())); + phi::errors::NotFound( + "Not found operation. The drr operation [%s] must exist in the " + "pattern graph to be obtained.", + op_call->name())); return operation_map_.at(op_call); } @@ -65,10 +64,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( iter, tensor_map_.end(), - phi::errors::NotFound("Not found tensor." - "The Drr tensor [%s] is not found in the map, " - "unable to obtain the corresponding IrValue.", - tensor_name)); + phi::errors::NotFound( + "Not found tensor. The drr tensor [%s] is not found in the map, " + "unable to obtain the corresponding IrValue.", + tensor_name)); return iter->second; } @@ -77,10 +76,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( iter, attr_map_.end(), - phi::errors::NotFound("Not found attr." - "The Drr attr [%s] is not found in the map, " - "unable to obtain the corresponding Attribute.", - attr_name)); + phi::errors::NotFound( + "Not found attr. The drr attr [%s] is not found in the map, unable " + "to obtain the corresponding Attribute.", + attr_name)); return iter->second; } diff --git a/paddle/fluid/pir/drr/src/pattern_context.cc b/paddle/fluid/pir/drr/src/pattern_context.cc index 7bdee5d5dcafe..fe72170bc9eea 100644 --- a/paddle/fluid/pir/drr/src/pattern_context.cc +++ b/paddle/fluid/pir/drr/src/pattern_context.cc @@ -39,7 +39,7 @@ const Op& DrrPatternContext::SourceOpPattern( const std::string& op_type, const std::unordered_map& attributes) { owned_ops_.push_back(std::shared_ptr( - new drr::Op(op_type, attributes, source_pattern_graph_.get()))); + new drr::Op(op_type, source_pattern_graph_.get(), attributes))); return *owned_ops_.back(); } @@ -50,9 +50,10 @@ drr::Tensor& DrrPatternContext::SourceTensorPattern(const std::string& name) { const Op& DrrPatternContext::ResultOpPattern( const std::string& op_type, - const std::unordered_map& attributes) { - owned_ops_.push_back(std::shared_ptr( - new drr::Op(op_type, attributes, result_pattern_graph_.get()))); + const std::unordered_map& attributes, + const std::unordered_map& runtime_attributes) { + owned_ops_.push_back(std::shared_ptr(new drr::Op( + op_type, result_pattern_graph_.get(), attributes, runtime_attributes))); return *owned_ops_.back(); } @@ -174,8 +175,9 @@ void Tensor::operator=(const Tensor& other) const { // NOLINT const drr::Op& ResultPattern::Op( const std::string& op_type, - const std::unordered_map& attributes) { - return ctx_->ResultOpPattern(op_type, attributes); + const std::unordered_map& attributes, + const std::unordered_map& runtime_attributes) { + return ctx_->ResultOpPattern(op_type, attributes, runtime_attributes); } drr::Tensor& ResultPattern::Tensor(const std::string& name) { diff --git a/paddle/fluid/pir/drr/src/rewrite_pattern.cc b/paddle/fluid/pir/drr/src/rewrite_pattern.cc index a5ea7ad074c9f..6b2c7cab2ba13 100644 --- a/paddle/fluid/pir/drr/src/rewrite_pattern.cc +++ b/paddle/fluid/pir/drr/src/rewrite_pattern.cc @@ -15,6 +15,7 @@ #include #include +#include "glog/vlog_is_on.h" #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/drr/include/drr_rewrite_pattern.h" #include "paddle/fluid/pir/drr/src/ir_operation_factory.h" @@ -43,12 +44,17 @@ DrrRewritePattern::DrrRewritePattern( constraints_(drr_context.constraints()), result_pattern_graph_(drr_context.result_pattern_graph()), drr_pattern_owner_(drr_pattern_owner) { - PADDLE_ENFORCE_NE( - source_pattern_graph_->owned_op_call().empty(), - true, - phi::errors::InvalidArgument("Source pattern graph is empty." - "Suggested fix: Please check the DRR " - "source pattern definition code.")); + PADDLE_ENFORCE_NE(source_pattern_graph_->owned_op_call().empty(), + true, + phi::errors::InvalidArgument( + "Source pattern graph is empty. Suggested fix: please " + "check the drr source pattern definition code.")); + if (VLOG_IS_ON(4)) { + std::cout << "\nThe source pattern graph in [" << pattern_name << "]:\n" + << *source_pattern_graph_ << std::endl; + std::cout << "\nThe result pattern graph in [" << pattern_name << "]:\n" + << *result_pattern_graph_ << std::endl; + } } bool DrrRewritePattern::MatchAndRewrite( @@ -415,9 +421,8 @@ bool DrrRewritePattern::MatchFromOutputToInput( step, source_pattern_graph.CountOfOpCalls(), phi::errors::PreconditionNotMet( - "Pattern matching failed." - "The number of successful matches and the number of OpCalls in the " - "source pattern graph are not equal.")); + "Pattern matching failed. The number of successful matches and the " + "number of OpCalls in the source pattern graph are not equal.")); } else { return matched; } @@ -465,25 +470,25 @@ MatchContextImpl DrrRewritePattern::CreateOperations( PADDLE_ENFORCE_NE( result_pattern_graph.id2owned_tensor().count(in_tensor), 0, - phi::errors::NotFound("Not found the input tensor." - "Drr input tensor [%s] must exist in the result " - "pattern graph to be obtained.", - in_tensor)); + phi::errors::NotFound( + "Not found the input tensor. Drr input tensor [%s] must exist in " + "the result pattern graph to be obtained.", + in_tensor)); if (!result_pattern_graph.id2owned_tensor().at(in_tensor)->is_none()) { res_match_ctx.BindIrValue(in_tensor, src_match_ctx.GetIrValue(in_tensor)); } } - std::vector> temp_program; - std::unordered_map op_2_temp_program_index; - for (auto& op : *rewriter.block()) { - op_2_temp_program_index[&op] = temp_program.size(); - temp_program.push_back({&op}); - } - // topo order visit result_pattern_graph GraphTopo graph_topo_visit(&result_pattern_graph); graph_topo_visit.WalkGraphNodesTopoOrder([&](const OpCall& op_call) { + std::vector> temp_program; + std::unordered_map op_2_temp_program_index; + for (auto& op : *rewriter.block()) { + op_2_temp_program_index[&op] = temp_program.size(); + temp_program.push_back({&op}); + } + // set insert point size_t max_input_op_index = 0UL; pir::Operation* max_index_op = nullptr; @@ -530,11 +535,13 @@ MatchContextImpl DrrRewritePattern::CreateOperations( pir::Operation* new_op = CreateOperation(op_call, src_match_ctx, rewriter, &res_match_ctx); - op_2_temp_program_index[new_op] = max_input_op_index + 1; - if (max_input_op_index + 1 >= temp_program.size()) { + + size_t new_max_input_op_index = max_input_op_index + 1; + op_2_temp_program_index[new_op] = new_max_input_op_index; + if (new_max_input_op_index >= temp_program.size()) { temp_program.push_back({}); } - temp_program[max_input_op_index + 1].push_back(new_op); + temp_program[new_max_input_op_index].push_back(new_op); }); return res_match_ctx; diff --git a/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt b/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt new file mode 100644 index 0000000000000..4ab79bd350dc0 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt @@ -0,0 +1,9 @@ +file(GLOB_RECURSE SERIALIZE_DESERIALIZE_CPP_SOURCES "*.cc") + +include_directories(pir_save_load PRIVATE + ${PADDLE_SOURCE_DIR}/third_party/nlohmann_json/include/) + +cc_library( + pir_save_load + SRCS ${SERIALIZE_DESERIALIZE_CPP_SOURCES} + DEPS op_dialect phi json) diff --git a/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h new file mode 100644 index 0000000000000..1eaa8843033f1 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h @@ -0,0 +1,314 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/common/layout.h" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/serialize_deserialize/include/schema.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_attribute.h" +#include "paddle/pir/include/core/builtin_type.h" + +namespace pir { + +template +T deserializeTypeFromJson(Json* type_json, pir::IrContext* ctx) { + return T::get(ctx); +} + +template +T deserializeAttrFromJson(Json* attr_json, pir::IrContext* ctx) { + CPP_T data = attr_json->at(DATA).template get(); + return T::get(ctx, data); +} + +template <> +pir::Complex64Attribute deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + phi::dtype::complex data = + phi::dtype::complex(data_json.at(0).template get(), + data_json.at(1).template get()); + return pir::Complex64Attribute::get(ctx, data); +} + +template <> +pir::Complex128Attribute +deserializeAttrFromJson(Json* attr_json, + pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + phi::dtype::complex data = + phi::dtype::complex(data_json.at(0).template get(), + data_json.at(1).template get()); + return pir::Complex128Attribute::get(ctx, data); +} + +template <> +paddle::dialect::IntArrayAttribute +deserializeAttrFromJson>(Json* attr_json, + pir::IrContext* ctx) { + std::vector data = attr_json->at(DATA).get>(); + phi::IntArray int_array = phi::IntArray(data); + return paddle::dialect::IntArrayAttribute::get(ctx, int_array); +} + +pir::Attribute deserializeAttrFromJson_scalarAttr(Json* attr_json, + pir::IrContext* ctx) { + Json content = attr_json->at(DATA); + phi::DataType dtype_ = + phi::StringToDataType(content.at(0).template get()); + phi::Scalar scalar; + + if (dtype_ == phi::DataType::FLOAT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::FLOAT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT8) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::FLOAT16 || + dtype_ == phi::DataType::UINT16 || + dtype_ == phi::DataType::BFLOAT16) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT16) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT8) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::BOOL) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::COMPLEX64) { + float scalar_real = content.at(1).template get(); + float scalar_imag = content.at(2).template get(); + phi::dtype::complex data = + phi::dtype::complex(scalar_real, scalar_imag); + scalar = phi::Scalar(data); + } else if (dtype_ == phi::DataType::COMPLEX128) { + double scalar_real = content.at(1).template get(); + double scalar_imag = content.at(1).template get(); + phi::dtype::complex data = + phi::dtype::complex(scalar_real, scalar_imag); + scalar = phi::Scalar(data); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Invalid tensor data type `", dtype_, "`.")); + } + + return paddle::dialect::ScalarAttribute::get(ctx, scalar); +} + +template <> +paddle::dialect::DataTypeAttribute +deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + std::string data = attr_json->at(DATA).template get(); + phi::DataType data_type = phi::StringToDataType(data); + return paddle::dialect::DataTypeAttribute::get(ctx, data_type); +} + +template <> +paddle::dialect::PlaceAttribute +deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + int8_t type_id = data_json.at(0).template get(); + phi::AllocationType type = static_cast(type_id); + int8_t id = data_json.at(1).template get(); // int8_t + std::string dev_type = data_json.at(2).template get(); // string + phi::Place place = phi::Place(type, id, dev_type); + return paddle::dialect::PlaceAttribute::get(ctx, place); +} + +pir::Type parseType(Json* type_json) { + auto type_name = type_json->at(ID).template get(); + pir::IrContext* ctx = pir::IrContext::Instance(); + + if (type_name == pir::BoolType::name()) { + VLOG(8) << "Parse BoolType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::BFloat16Type::name()) { + VLOG(8) << "Parse BFloat16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float16Type::name()) { + VLOG(8) << "Parse Float16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float32Type::name()) { + VLOG(8) << "Parse Float32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float64Type::name()) { + VLOG(8) << "Parse Float64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int8Type::name()) { + VLOG(8) << "Parse Int8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::UInt8Type::name()) { + VLOG(8) << "Parse UInt8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int16Type::name()) { + VLOG(8) << "Parse Int16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int32Type::name()) { + VLOG(8) << "Parse Int32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int64Type::name()) { + VLOG(8) << "Parse Int64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::IndexType::name()) { + VLOG(8) << "Parse IndexType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex64Type::name()) { + VLOG(8) << "Parse Complex64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex128Type::name()) { + VLOG(8) << "Parse Complex128Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::VectorType::name()) { + VLOG(8) << "Parse VectorType ... "; + std::vector content; + for (auto& type_x : type_json->at(DATA)) { + content.push_back(parseType(&type_x)); + } + return pir::VectorType::get(ctx, content); + } else if (type_name == pir::DenseTensorType::name()) { + VLOG(8) << "Parse DenseTensorType ... "; + Json data_json = type_json->at(DATA); + pir::Type dtype = parseType(&(data_json.at(0))); + + std::vector dims = + data_json.at(1).template get>(); + phi::DDim ddim = phi::make_ddim(dims); + pir::DataLayout data_layout = + common::StringToDataLayout(data_json.at(2).template get()); + + std::vector> lod = + data_json.at(3).template get>>(); + + size_t offset = data_json.at(4).get(); + return pir::DenseTensorType::get( + ctx, dtype, ddim, data_layout, lod, offset); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Type %s for parse type", type_name)); + } + VLOG(8) << "Finish Parse Type ... "; + + return pir::Type(); +} + +template <> +pir::TypeAttribute deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + pir::Type type = parseType(&(attr_json->at(DATA))); + return pir::TypeAttribute::get(ctx, type); +} + +pir::Attribute parseAttr(Json* attr_json) { + std::string attr_name = attr_json->at(ID).template get(); + pir::IrContext* ctx = pir::IrContext::Instance(); + + if (attr_name == pir::BoolAttribute::name()) { + VLOG(8) << "Parse BoolAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::FloatAttribute::name()) { + VLOG(8) << "Parse FloatAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::DoubleAttribute::name()) { + VLOG(8) << "Parse DoubleAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::Int32Attribute::name()) { + VLOG(8) << "Parse Int32Attribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::Int64Attribute::name()) { + VLOG(8) << "Parse Int64Attribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::IndexAttribute::name()) { + VLOG(8) << "Parse IndexAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::ArrayAttribute::name()) { + VLOG(8) << "Parse ArrayAttribute ."; + std::vector val; + for (auto& attr_ : attr_json->at(DATA)) { + val.push_back(parseAttr(&(attr_))); + } + return pir::ArrayAttribute::get(ctx, val); + } else if (attr_name == pir::TypeAttribute::name()) { + VLOG(8) << "Parse TypeAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::TensorNameAttribute::name()) { + VLOG(8) << "Parse TensorNameAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::Complex64Attribute::name()) { + VLOG(8) << "Parse Complex64Attribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::Complex128Attribute::name()) { + VLOG(8) << "Parse Complex128Attribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::StrAttribute::name()) { + VLOG(8) << "Parse StrAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == paddle::dialect::IntArrayAttribute::name()) { + VLOG(8) << "Parse IntArrayAttribute ."; + return pir::deserializeAttrFromJson>(attr_json, ctx); + } else if (attr_name == paddle::dialect::ScalarAttribute::name()) { + VLOG(8) << "Parse ScalarAttribute ."; + // this func's return type is pir::Attribute which is diffrent + // from paddle::dialect::ScalarAttribute + return pir::deserializeAttrFromJson_scalarAttr(attr_json, ctx); + } else if (attr_name == paddle::dialect::DataTypeAttribute::name()) { + VLOG(8) << "Parse DataTypeAttribute ."; + return pir::deserializeAttrFromJson(attr_json, ctx); + } else if (attr_name == paddle::dialect::PlaceAttribute::name()) { + VLOG(8) << "Parse PlaceAttribute ."; + return pir::deserializeAttrFromJson(attr_json, ctx); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Attr %s for parse attr", attr_name)); + } + VLOG(8) << "Finish Parse Attr ... "; + + return pir::Attribute(); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/interface.h b/paddle/fluid/pir/serialize_deserialize/include/interface.h new file mode 100644 index 0000000000000..3302dc1b90bb7 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/interface.h @@ -0,0 +1,63 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/pir/include/core/program.h" +namespace pir { +/** + * @brief Write the given PIR program into a file at the specified file path. + * + * @param[in] program The PIR program to be written. + * @param[in] file_path The path to the file to be written. + * @param[in] pir_version The version number of PIR, used to identify or verify + * the written program version + * @param[in] overwrite If the file already exists, this flag determines + * whether to overwrite the existing file. + * @param[in] readable (Optional parameter, default to false) If true, the + * generated file will be has indent structure. + * @param[in] trainable (Optional parameter, default to true) If true, + * operation has opresult_attrs for training like stop_gradient,persistable; + * Otherwise, it may only has opinfo attrs. + * + * @return void。 + * + * @note readable and trainable Parameters may affect the content and format of + * the generated file, depending on implementation. + */ +void WriteModule(const pir::Program& program, + const std::string& file_path, + const uint64_t& pir_version, + bool overwrite, + bool readable = false, + bool trainable = true); + +/** + * @brief Gets a PIR program from the specified file path. + * + * @param[in] file_path The path to the file from which the PIR program + * should be read. + * @param[out] program A pointer to the PIR program object where the + * deserilize program will be stored. + * @param[in] pir_version The current version of the PIR program format. + * + * @return Void. The function modifies the 'program' object to contain the data + * read from the file. + * + * @note If 'pir_version' is larger than the version of file, will trigger + * version compatibility modification rule. + */ +void ReadModule(const std::string& file_path, + pir::Program* program, + const uint64_t& pir_version); +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h new file mode 100644 index 0000000000000..2ae9f22d21a9c --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h @@ -0,0 +1,52 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/common/enforce.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/pir/include/core/operation.h" +#include "paddle/pir/include/core/program.h" + +namespace pir { + +class ProgramReader { + public: + explicit ProgramReader(const uint64_t version) : current_version(version) {} + + ProgramReader(ProgramReader&&) = delete; + ProgramReader(const ProgramReader& ProgramReader) = delete; + ProgramReader& operator=(const ProgramReader&) = delete; + ProgramReader& operator=(ProgramReader&&); + + // static void staticInit() + + void RecoverProgram(Json* program_json, pir::Program* recover_program); + ~ProgramReader() = default; + + private: + uint64_t current_version; + std::map id_value_map; + + void ReadProgram(Json* program_json, pir::Program* program); + void ReadRegion(Json* region_json, pir::Region* region); + void ReadBlock(Json* block_json, pir::Block* block); + pir::Operation* ReadOp(Json* op_json); + pir::AttributeMap ReadAttributesMap(Json* attrs_json, + Json* operesult_attrs_json); + pir::Attribute ReadAttribute(Json* attr_json); + pir::Type ReadType(Json* type_json); +}; + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h new file mode 100644 index 0000000000000..de8c7be16c5d6 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h @@ -0,0 +1,81 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/pir/include/core/program.h" + +namespace pir { +/** + * ProgramWriter is used to serialize pir program to json object. + * + */ + +class ProgramWriter { + public: + explicit ProgramWriter(const uint64_t version) : version_(version) {} + explicit ProgramWriter(const uint64_t version, const bool trainable) + : version_(version), trainable_(trainable) {} + + ProgramWriter(ProgramWriter&&) = delete; + ProgramWriter(const ProgramWriter& ProgramWriter) = delete; + ProgramWriter& operator=(const ProgramWriter&) = delete; + ProgramWriter& operator=(ProgramWriter&&); + + /** GetProgramJson is used by writeModulde api*/ + Json GetProgramJson(const pir::Program* program); + ~ProgramWriter() = default; + + private: + /** version_ is the version of paddlepaddle. which is used to + * Conduct version compatibility judgment and modification.*/ + uint64_t version_; + + /** program_json is the json object of pir program. */ + Json program_json; + + /** value_id_map is used to record the serialize id of pir::Value. + * which is used to serilize op's operands. */ + std::map value_id_map; + + /** xxx_id_ is used to record current id of IR structure + * which should be serialized.*/ + + int64_t region_id_ = 0; + int64_t block_id_ = 0; + int64_t value_id_ = 1; + int64_t blockarg_id_ = -1; + + bool trainable_ = true; + + Json WriteProgram(const pir::Program* program); + Json WriteRegion(const pir::Region* region, const std::string& region_name); + Json WriteBlock(const pir::Block* block, const std::string& block_name); + Json WriteOp(const pir::Operation& op); + Json WriteBlockArg(const pir::Value& value); + Json WriteValue(const pir::Value& value); + Json WriteOpOperand(const pir::OpOperand& op_operand); + Json WriteAttributesMapOpinfo(pir::Operation* op, + const AttributeMap& attr_map); + Json WriteAttributesMapOther(const AttributeMap& attr_map); + /** WriteAttribute is used to write attribute of op. + * which call writeAttr to get Derived Class‘s json object. + * same as WriteType + */ + + Json WriteAttribute(const std::string& op_attr_name, + const pir::Attribute& attr); + Json WriteType(const pir::Type& type); +}; + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h b/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h new file mode 100644 index 0000000000000..5ebbafb1eb4f7 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include + +#include "paddle/phi/core/dense_tensor.h" + +namespace pir { + +void SaveFunction(const phi::DenseTensor& x, + const std::string& name, + const std::string& file_path, + bool overwrite, + bool save_as_fp16); + +void SaveCombineFunction(const std::vector& x, + const std::vector& names, + const std::string& file_path, + bool overwrite, + bool save_as_fp16, + bool save_to_memory); + +void LoadFunction(const std::string& file_path, + int64_t seek, + const std::vector& shape, + bool load_as_fp16, + phi::DenseTensor* out); + +void LoadCombineFunction(const std::string& file_path, + const std::vector& names, + std::vector* out, + bool load_as_fp16); +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/schema.h b/paddle/fluid/pir/serialize_deserialize/include/schema.h new file mode 100644 index 0000000000000..75973b99ca049 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/schema.h @@ -0,0 +1,65 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace pir { +/** + * IMPORTANT!!! + * all those defining strings can't be changed, otherwise the deserialization + * will failed. define all keys in serialized files to ensure accuracy for + * deserialization make sure all the key mutually exclusive + */ + +// all IR structure's identifier (region, block, op, attr, type value etc) +// which can be string , int64_t etc. +#define ID "id" + +// program's key: +#define REGIONS "regions" + +// region's key: +// which is json array with block json object(ID and BLOCKARGS and BLOCKOPS) +#define BLOCKS "blocks" + +// block's key: +// which is json array with value json object +#define BLOCKARGS "args" +// which is json array with operation json object +#define BLOCKOPS "ops" + +// operation's key: +// which is json array with opoperand json object(ID) +#define OPOPERANDS "I" + +// which is json array with value json object(ID and TYPE_TYPE) +#define OPRESULTS "O" + +// which is json array with json object(NAME and ATTR_TYPE) +#define ATTRS "A" +#define OPRESULTS_ATTRS "OA" + +// value's key: +// value's type which should be pir::Type's json object(ID or ID and DATA). +#define TYPE_TYPE "TT" + +// attr's name which is operation's feature. +#define NAME "N" + +// attr's value which is pir::Attribute's json object(ID and DATA). +#define ATTR_TYPE "AT" + +// type/attr's contents which is json::array. +#define DATA "D" + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h new file mode 100644 index 0000000000000..0b75579e080b3 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h @@ -0,0 +1,343 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "glog/logging.h" + +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/serialize_deserialize/include/schema.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_attribute.h" +#include "paddle/pir/include/core/builtin_type.h" + +namespace pir { +/** serializeTypeToJson is a template function to serialize + * a pir type to a json object. a pir type may have value or no value + * Value free types only have ID, while value based types have + * DATA in addition to ID. + * + * If a new pir type is added, which needs to be serialized, + * it must have a name() method, returning a string which + * should be different from other types' names. + * (The name template is t_dialectname_typename). + * Note: The prefixes t are assumed to represent 'type'. + * + * If the pir type has value, it should have a data() method, + * which returns the value of type. The data() method is better + * suited to return TYPE which supported by json like std::vector, + * std::string, int, float and so on. if not, serailizeTypeToJson + * need to be specialized. + */ + +template +Json serializeTypeToJson(const T& type) { + Json json_obj; + json_obj[ID] = type.name(); + return json_obj; +} + +/** serializeAttrToJson is a template function to serialize + * pir attribute to json object. pir attribute usually have + * value, so it's json object has DATA and ID. + * + * If a new pir attr is added, which needs to be serialized, + * it must have a name() method, returning a string which + * should be different from other types' names. + * (The name template is a_dialectname_typename). + * Note: The prefixes a are assumed to represent 'attribute'. + * + * It also need have a data() method, which returns the value of + * attribute. The data() method is better suited to return TYPE + * which supported by json like std::vector, std::string, int, + * float and so on. if not, serailizeAttrToJson + * need to be specialized. + */ + +template +Json serializeAttrToJson(const T& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + json_obj[DATA] = attr.data(); + return json_obj; +} + +#define SERIALIZE_ATTR_TO_JSON(type, data) \ + template <> \ + Json serializeAttrToJson(const type& attr) { \ + Json json_obj; \ + json_obj[ID] = attr.name(); \ + json_obj[DATA] = data; \ + return json_obj; \ + } + +SERIALIZE_ATTR_TO_JSON(pir::StrAttribute, attr.AsString()); + +SERIALIZE_ATTR_TO_JSON(pir::Complex64Attribute, + std::vector({attr.data().real, attr.data().imag})); +SERIALIZE_ATTR_TO_JSON(pir::Complex128Attribute, + std::vector({attr.data().real, attr.data().imag})); +SERIALIZE_ATTR_TO_JSON(paddle::dialect::IntArrayAttribute, + attr.data().GetData()); +SERIALIZE_ATTR_TO_JSON(paddle::dialect::DataTypeAttribute, + phi::DataTypeToString(attr.data())); + +template <> +Json serializeAttrToJson( + const paddle::dialect::ScalarAttribute& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + + Json content = Json::array(); + auto scalar = attr.data(); + auto dtype_ = scalar.dtype(); + content.push_back(DataTypeToString(dtype_)); + + if (dtype_ == phi::DataType::FLOAT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::FLOAT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT8) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::FLOAT16 || + dtype_ == phi::DataType::UINT16 || + dtype_ == phi::DataType::BFLOAT16) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT16) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT8) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::BOOL) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::COMPLEX64) { + content.push_back(scalar.to>().real); + content.push_back(scalar.to>().imag); + } else if (dtype_ == phi::DataType::COMPLEX128) { + content.push_back(scalar.to>().real); + content.push_back(scalar.to>().imag); + } else { + PADDLE_THROW(common::errors::InvalidArgument( + "Invalid tensor data type `", dtype_, "`.")); + } + json_obj[DATA] = content; + return json_obj; +} + +template <> +Json serializeAttrToJson( + const paddle::dialect::PlaceAttribute& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + Json content = Json::array(); + auto place = attr.data(); + content.push_back(static_cast(place.GetType())); + content.push_back(place.GetDeviceId()); // int8_t + content.push_back(place.GetDeviceType()); // string + json_obj[DATA] = content; + return json_obj; +} + +Json writeType(const pir::Type& type) { + Json type_json = Json::object(); + if (type.isa()) { + VLOG(8) << "Write BoolType ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write BFloat16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float32Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int8Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write UInt8Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int32Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write IndexType ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Complex64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Complex128Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + // NOTE(Ruting) those Types need call writeType which make build error + // when use template func serializeTypeToJson + } else if (type.isa()) { + VLOG(8) << "Write VectorType ... "; + auto type_ = type.dyn_cast(); + type_json[ID] = type_.name(); + Json content = Json::array(); + for (auto type_x : type_.data()) { + content.push_back(writeType(type_x)); + } + type_json[DATA] = content; + return type_json; + } else if (type.isa()) { + VLOG(8) << "Write DenseTensorType ... "; + auto type_ = type.dyn_cast(); + + type_json[ID] = type_.name(); + Json content = Json::array(); + content.push_back(writeType(type_.dtype())); + + std::vector dims_; + for (auto i = 0; i < type_.dims().size(); i++) { + dims_.push_back(type_.dims().at(i)); + } + content.push_back(dims_); + + content.push_back(DataLayoutToString(type_.data_layout())); + + content.push_back(type_.lod()); + + content.push_back(type_.offset()); + type_json[DATA] = content; + return type_json; + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Type when write type")); + } + VLOG(8) << "Finish write Type ... "; + + return type_json; +} + +SERIALIZE_ATTR_TO_JSON(pir::TypeAttribute, writeType(attr.data())); + +Json writeAttr(const pir::Attribute& attr) { + Json attr_json = Json::object(); + if (attr.isa()) { + VLOG(8) << "write BoolAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write FloatAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DoubleAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int32Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write IndexAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write ArrayAttribute ."; + auto attr_ = attr.dyn_cast(); + Json val = Json::array(); + for (size_t i = 0; i < attr_.size(); i++) { + val.push_back(writeAttr(attr_.at(i))); + } + attr_json[ID] = attr_.name(); + attr_json[DATA] = val; + return attr_json; + } else if (attr.isa()) { + VLOG(8) << "write TypeAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write TensorNameAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex128Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write StrAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write IntArrayAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write ScalarAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DataTypeAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write PlaceAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Attr %s when write attr")); + } + VLOG(8) << "Finish write& attr ... "; + + return attr_json; +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/third_part.h b/paddle/fluid/pir/serialize_deserialize/include/third_part.h new file mode 100644 index 0000000000000..bfa5146336902 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/third_part.h @@ -0,0 +1,17 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "nlohmann/json.hpp" +using Json = nlohmann::json; diff --git a/paddle/fluid/pir/serialize_deserialize/src/interface.cc b/paddle/fluid/pir/serialize_deserialize/src/interface.cc new file mode 100644 index 0000000000000..60160647c1f7a --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/interface.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/interface.h" +#include "paddle/common/enforce.h" +#include "paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h" +#include "paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h" +#include "paddle/phi/common/port.h" + +namespace pir { +#define PROGRAM "program" +#define BASE_CODE "base_code" +#define MAGIC "magic" +#define PIRVERSION "version" +#define PIR "pir" +void WriteModule(const pir::Program& program, + const std::string& file_path, + const uint64_t& pir_version, + bool overwrite, + bool readable, + bool trainable) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + common::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + // write base code + Json total; + + total[BASE_CODE] = {{MAGIC, PIR}, {PIRVERSION, pir_version}}; + + ProgramWriter writer(pir_version, trainable); + // write program + total[PROGRAM] = writer.GetProgramJson(&program); + std::string total_str; + if (readable) { + total_str = total.dump(4); + } else { + total_str = total.dump(); + } + + MkDirRecursively(DirName(file_path).c_str()); + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fout), + true, + common::errors::Unavailable( + "Cannot open %s to save variables.", file_path)); + fout << total_str; + fout.close(); +} + +void ReadModule(const std::string& file_path, + pir::Program* program, + const uint64_t& pir_version) { + std::ifstream f(file_path); + Json data = Json::parse(f); + + ProgramReader reader(pir_version); + reader.RecoverProgram(&(data[PROGRAM]), program); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc new file mode 100644 index 0000000000000..12f46f33604c3 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc @@ -0,0 +1,159 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h" +#include "paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h" +namespace pir { +void ProgramReader::RecoverProgram(Json* program_json, + pir::Program* recover_program) { + ReadProgram(program_json, recover_program); + VLOG(6) << "Finish json to program."; + return; +} +void ProgramReader::ReadProgram(Json* program_json, pir::Program* program) { + auto top_level_op = program->module_op(); + PADDLE_ENFORCE_EQ( + program_json->at(REGIONS).size(), + 1, + common::errors::InvalidArgument( + "The redions size of program module should be 1 but got %d.", + program_json->at(REGIONS).size())); + auto& region_json = program_json->at(REGIONS).at(0); + auto& block_json = region_json.at(BLOCKS).at(0); + auto& block = top_level_op.block(); + ReadBlock(&block_json, &block); + + VLOG(6) << "Finish Read program."; + return; +} + +void ProgramReader::ReadRegion(Json* region_json, pir::Region* region) { + auto region_name = region_json->at(ID).template get(); + for (auto& block_json : region_json->at(BLOCKS)) { + auto& block = region->emplace_back(); + ReadBlock(&block_json, &block); + } + VLOG(6) << "Finish Read " << region_name; + return; +} + +void ProgramReader::ReadBlock(Json* block_json, pir::Block* block) { + auto block_name = block_json->at(ID).template get(); + + Json& args_json = block_json->at(BLOCKARGS); + if (!args_json.empty()) { + for (auto& arg_json : args_json) { + int64_t arg_id_ = arg_json.at(ID).template get(); + auto value = block->AddArg(ReadType(&(arg_json.at(TYPE_TYPE)))); + id_value_map[arg_id_] = value; + VLOG(6) << "Finish Read blockargument " << arg_id_; + } + } + + Json& ops_json = block_json->at(BLOCKOPS); + if (!ops_json.empty()) { + for (auto& op_json : ops_json) { + block->push_back(ReadOp(&op_json)); + } + } + + VLOG(6) << "Finish Read " << block_name; + return; +} + +pir::Operation* ProgramReader::ReadOp(Json* op_json) { + auto op_name = op_json->at(ID).template get(); + + // deserialize opoperands (find value) + Json& operands_json = op_json->at(OPOPERANDS); + std::vector inputs; + for (auto& operand_json : operands_json) { + int64_t id = operand_json.at(ID).template get(); + inputs.push_back(id_value_map[id]); + } + + // deserialize opresults (find type) + Json& opresults_json = op_json->at(OPRESULTS); + std::vector output_types; + std::vector output_ids; + for (auto& opresult_json : opresults_json) { + int64_t value_id_ = opresult_json.at(ID).template get(); + output_ids.push_back(value_id_); + output_types.push_back(ReadType(&(opresult_json.at(TYPE_TYPE)))); + VLOG(6) << "Finish Read value " << value_id_; + } + + // serialize necessary attributes + Json& attrs_json = op_json->at(ATTRS); + + pir::AttributeMap attributes; + if (op_json->contains(OPRESULTS_ATTRS)) { + Json& opresults_attrs_json = op_json->at(OPRESULTS_ATTRS); + attributes = ReadAttributesMap(&attrs_json, &opresults_attrs_json); + } else { + Json empty_json = Json::array(); + attributes = ReadAttributesMap(&attrs_json, &empty_json); + } + + pir::IrContext* ctx_ = pir::IrContext::Instance(); + // prepare opinfo + pir::OpInfo op_info = ctx_->GetRegisteredOpInfo(op_name); + + // deserialize op + pir::Operation* op = + Operation::Create(inputs, attributes, output_types, op_info); + + PADDLE_ENFORCE_EQ( + output_ids.size(), + static_cast(op->num_results()), + common::errors::InvalidArgument( + "deserialized op has %d results, but the original op has %d results.", + op->num_results(), + output_ids.size())); + + for (uint32_t i = 0; i < op->num_results(); i++) { + id_value_map[output_ids[i]] = op->result(i); + } + + VLOG(6) << "Finish Read Operation " << op->name(); + return op; +} + +pir::AttributeMap ProgramReader::ReadAttributesMap(Json* attrs_json, + Json* opresult_attrs_json) { + pir::AttributeMap attributes; + for (auto& attr_json : *attrs_json) { + auto attr_name = attr_json.at(NAME).template get(); + attributes.insert({attr_name, ReadAttribute(&attr_json)}); + } + VLOG(6) << "Finish Read pir::AttributeMap "; + for (auto& attr_json : *opresult_attrs_json) { + auto attr_name = attr_json.at(NAME).template get(); + attributes.insert({attr_name, ReadAttribute(&attr_json)}); + } + VLOG(6) << "Finish Read Opresults_AttributeMap "; + return attributes; +} + +pir::Attribute ProgramReader::ReadAttribute(Json* attr_json) { + VLOG(6) << "Begin Read Attribute. "; + return pir::parseAttr(&attr_json->at(ATTR_TYPE)); +} + +pir::Type ProgramReader::ReadType(Json* type_json) { + VLOG(6) << "Begin Read Type. "; + return pir::parseType(type_json); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc new file mode 100644 index 0000000000000..7af995074461b --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc @@ -0,0 +1,205 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h" +#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" +#include "paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h" +#include "paddle/pir/include/core/dialect.h" +#include "paddle/pir/include/core/operation.h" + +namespace pir { + +Json ProgramWriter::GetProgramJson(const pir::Program* program) { + program_json = WriteProgram(program); + VLOG(6) << "Finish program to json."; + return program_json; +} + +Json ProgramWriter::WriteProgram(const pir::Program* program) { + Json program_json; + program_json[REGIONS] = Json::array(); + auto top_level_op = program->module_op(); + + for (size_t i = 0; i < top_level_op->num_regions(); ++i) { + std::string region_name = "region_" + std::to_string(region_id_++); + auto& region = top_level_op->region(i); + auto region_json = WriteRegion(®ion, region_name); + program_json[REGIONS].emplace_back(region_json); + } + VLOG(6) << "Finish write program."; + return program_json; +} + +Json ProgramWriter::WriteRegion(const pir::Region* region, + const std::string& region_name) { + Json region_json; + region_json[ID] = region_name; + region_json[BLOCKS] = Json::array(); + for (auto block : region->blocks()) { + std::string block_name = "block_" + std::to_string(block_id_++); + auto block_json = WriteBlock(block, block_name); + region_json[BLOCKS].emplace_back(block_json); + } + VLOG(6) << "Finish write " << region_name; + return region_json; +} + +Json ProgramWriter::WriteBlock(const pir::Block* block, + const std::string& block_name) { + Json block_json; + block_json[ID] = block_name; + + Json args_json = Json::array(); + for (auto arg : block->args()) { + auto arg_json = WriteBlockArg(arg); + args_json.emplace_back(arg_json); + } + block_json[BLOCKARGS] = args_json; + + Json ops_json = Json::array(); + for (auto op : block->ops()) { + auto op_json = WriteOp(*op); + ops_json.emplace_back(op_json); + } + block_json[BLOCKOPS] = ops_json; + + VLOG(6) << "Finish write " << block_name; + return block_json; +} + +Json ProgramWriter::WriteBlockArg(const pir::Value& value) { + Json arg_json; + Json var = WriteType(value.type()); + value_id_map[value] = blockarg_id_; + arg_json[ID] = blockarg_id_; + arg_json[TYPE_TYPE] = var; + + VLOG(6) << "Finish write blockargument " << blockarg_id_; + blockarg_id_--; + + return arg_json; +} + +Json ProgramWriter::WriteValue(const pir::Value& value) { + Json var_json; + // Json var = value; + Json var = WriteType(value.type()); + value_id_map[value] = value_id_; + var_json[ID] = value_id_; + var_json[TYPE_TYPE] = var; + VLOG(6) << "Finish write value " << value_id_; + + value_id_++; + return var_json; +} + +Json ProgramWriter::WriteOp(const pir::Operation& op) { + Json op_json = Json::object(); + op_json[ID] = op.name(); + // serialize opoperands + Json operands_json = Json::array(); + for (auto operand : op.operands()) { + auto operand_json = WriteOpOperand(operand); + operands_json.emplace_back(operand_json); + } + op_json[OPOPERANDS] = operands_json; + + // serialize opresults + Json opresults_json = Json::array(); + for (auto& opresult : op.results()) { + auto opresult_json = WriteValue(opresult); + opresults_json.emplace_back(opresult_json); + } + op_json[OPRESULTS] = opresults_json; + + // serialize attributes + op_json[ATTRS] = WriteAttributesMapOpinfo(const_cast(&op), + op.attributes()); + if (trainable_) { + op_json[OPRESULTS_ATTRS] = WriteAttributesMapOther(op.attributes()); + } + + VLOG(6) << "Finish write Operation " << op.name(); + return op_json; +} + +Json ProgramWriter::WriteOpOperand(const pir::OpOperand& op_operand) { + Json operand_json = Json::object(); + int64_t id = value_id_map[op_operand.source()]; + operand_json[ID] = id; + VLOG(6) << "Finish write OpOperand " << id; + return operand_json; +} + +Json ProgramWriter::WriteAttributesMapOpinfo(pir::Operation* op, + const AttributeMap& attr_map) { + Json attrs_json = Json::array(); + + if (op->dialect()->name() == "pd_op") { + if (op->dyn_cast() != nullptr) { + auto [_1, attr_info, _3, _4, _5] = + op->dyn_cast().GetOpInfo(); + if (attr_info.size() != 0) { + for (auto it = attr_info.begin(); it != attr_info.end(); it++) { + if (attr_map.find(it->name) != attr_map.end()) { + attrs_json.emplace_back( + WriteAttribute(it->name, attr_map.at(it->name))); + } + } + } + } else { + PADDLE_THROW(common::errors::InvalidArgument( + "the %s do not has OpYamlInfoInterface", op->name())); + } + } else { + for (auto& attr : attr_map) { + if (attr.first != "stop_gradient" && attr.first != "persistable" && + attr.first != "op_callstack") { + attrs_json.emplace_back(WriteAttribute(attr.first, attr.second)); + } + } + } + + VLOG(6) << "Finish write Opinfo AttributeMap "; + return attrs_json; +} + +Json ProgramWriter::WriteAttributesMapOther(const AttributeMap& attr_map) { + Json operesult_attrs_json = Json::array(); + for (auto& attr : attr_map) { + if (attr.first == "stop_gradient" || attr.first == "persistable") { + operesult_attrs_json.emplace_back( + WriteAttribute(attr.first, attr.second)); + } + } + + VLOG(6) << "Finish write Other AttributeMap "; + return operesult_attrs_json; +} + +Json ProgramWriter::WriteAttribute(const std::string& op_attr_name, + const pir::Attribute& attr) { + Json attr_json; + attr_json[NAME] = op_attr_name; + attr_json[ATTR_TYPE] = pir::writeAttr(attr); + + VLOG(6) << "Finish write Attribute. "; + return attr_json; +} + +Json ProgramWriter::WriteType(const pir::Type& type) { + VLOG(6) << "Finish write Type. "; + return pir::writeType(type); +} +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc b/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc new file mode 100644 index 0000000000000..4ae1a01ffbfa5 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc @@ -0,0 +1,146 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h" + +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/phi/common/port.h" + +namespace pir { + +void SaveFunction(const phi::DenseTensor& x, + const std::string& name, + const std::string& file_path, + bool overwrite, + bool save_as_fp16) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + phi::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + MkDirRecursively(DirName(file_path).c_str()); + VLOG(6) << "save func save path: " << file_path; + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ( + static_cast(fout), + true, + phi::errors::Unavailable("Cannot open %s to save variables.", file_path)); + + paddle::framework::SerializeToStream(fout, x); + // TODO(changeyoung98): fp16 + fout.close(); + VLOG(6) << "save func done "; +} + +void SaveCombineFunction(const std::vector& x, + const std::vector& names, + const std::string& file_path, + bool overwrite, + bool save_as_fp16, + bool save_to_memory) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + phi::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + MkDirRecursively(DirName(file_path).c_str()); + VLOG(6) << "save func save path: " << file_path; + std::ostringstream ss; + for (size_t i = 0; i < x.size(); i++) { + auto& tensor = *(x[i]); + PADDLE_ENFORCE_EQ( + tensor.IsInitialized(), + true, + phi::errors::InvalidArgument( + "The Tensor with Index (%d) to be saved is not initialized.", i)); + // TODO(changeyoung98): fp16 + paddle::framework::SerializeToStream(ss, tensor); + } + MkDirRecursively(DirName(file_path).c_str()); + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ( + static_cast(fout), + true, + phi::errors::Unavailable("Cannot open %s to save variables.", file_path)); + fout << ss.str(); + fout.close(); + VLOG(6) << "save combine done "; +} + +void LoadFunction(const std::string& file_path, + int64_t seek, + const std::vector& shape, + bool load_as_fp16, + phi::DenseTensor* out) { + std::ifstream fin(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fin), + true, + phi::errors::Unavailable( + "Load operator fail to open file %s, please check " + "whether the model file is complete or damaged.", + file_path)); + PADDLE_ENFORCE_NOT_NULL(out, + phi::errors::InvalidArgument( + "The variable to be loaded cannot be found.")); + + if (seek != -1) { + PADDLE_ENFORCE_GE(seek, + 0, + phi::errors::InvalidArgument( + "seek with tensor must great than or equal to 0")); + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + const paddle::platform::DeviceContext* dev_ctx = nullptr; + dev_ctx = pool.Get(paddle::platform::CPUPlace()); + paddle::framework::DeserializeFromStream(fin, out, *dev_ctx, seek, shape); + } else { + paddle::framework::DeserializeFromStream(fin, out); + } + + // TODO(changeyoung98): fp16 +} + +void LoadCombineFunction(const std::string& file_path, + const std::vector& names, + std::vector* out, + bool load_as_fp16) { + std::ifstream fin(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fin), + true, + phi::errors::Unavailable( + "Load operator fail to open file %s, please check " + "whether the model file is complete or damaged.", + file_path)); + for (size_t i = 0; i < names.size(); i++) { + auto tensor = out->at(i); + paddle::framework::DeserializeFromStream(fin, tensor); + } + fin.peek(); + PADDLE_ENFORCE_EQ( + fin.eof(), + true, + phi::errors::Unavailable("Not allowed to load partial data via " + "load_combine_op, please use load_op instead.")); + // TODO(changeyoung98): fp16 +} + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/CMakeLists.txt b/paddle/fluid/pir/transforms/CMakeLists.txt index 627fcb78d8563..3a06aa2da7d77 100644 --- a/paddle/fluid/pir/transforms/CMakeLists.txt +++ b/paddle/fluid/pir/transforms/CMakeLists.txt @@ -6,7 +6,7 @@ if(NOT WITH_CINN) ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_detector.cc) endif() -if(NOT WITH_MKLDNN) +if(NOT WITH_ONEDNN) file(GLOB_RECURSE onednn_srcs "onednn/*.cc") list(REMOVE_ITEM transforms_srcs ${onednn_srcs}) endif() @@ -26,7 +26,7 @@ set(transforms_deps device_event_base) if(WITH_CINN) - set(transforms_deps ${transforms_deps} cinn_op_dialect cinnapi) + set(transforms_deps ${transforms_deps} cinnapi) endif() cc_library( diff --git a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc index bf1bc26850c56..e70039be7d375 100644 --- a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc +++ b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc @@ -238,7 +238,11 @@ class ConstantFoldingPattern : public pir::RewritePattern { const std::vector>& use_ops) const { for (auto [use_op, idx] : use_ops) { if (use_op->isa()) { - if (!ReplaceResultByParameterOp(use_op)) return false; + if (!ReplaceResultByParameterOp(use_op)) { + return false; + } + } else if (use_op->isa()) { + return false; } else if (use_op->HasInterface()) { auto [input_infos, _1, _2, _3, _4] = use_op->dyn_cast() @@ -255,6 +259,9 @@ class ConstantFoldingPattern : public pir::RewritePattern { } bool ReplaceResultByParameterOp(pir::Operation* op) const { + if (op->isa()) { + return false; + } for (uint32_t i = 0; i < op->num_results(); i++) { auto use_ops = pir::GetUseOpsForOutput(op, i); if (!CheckUseOps(use_ops)) return false; diff --git a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc index 619b9eeb3ec17..bf0c758ef3530 100644 --- a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc @@ -235,6 +235,18 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { } paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + return paddle::dialect::TransToPhiDataType(x_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_w") = cast_op_2(res.Tensor("w")); + const auto &fuse_layer_norm = res.Op(paddle::dialect::FusedBiasResidualLayernormOp::name(), {{"epsilon", pat.Attr("epsilon")}, @@ -248,9 +260,9 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { fuse_layer_norm( { &res.Tensor("x"), - &res.Tensor("bias"), + &res.Tensor("casted_bias"), &res.Tensor("residual"), - &res.Tensor("w"), + &res.Tensor("casted_w"), &res.InputNoneTensor(), }, {&res.Tensor("layer_norm_out"), diff --git a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc index dfd2b0ed588e2..09ecf2f170155 100644 --- a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc @@ -20,8 +20,6 @@ #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/utils/general_functions.h" -#include "paddle/pir/include/core/builtin_op.h" -#include "paddle/pir/include/core/value.h" #include "paddle/pir/include/pass/pass.h" #include "paddle/pir/include/pass/pass_registry.h" @@ -89,7 +87,7 @@ class Conv2dAddFusePattern : public paddle::drr::DrrPatternBase { &res.Tensor("filter"), &res.Tensor("bias"), &res.InputNoneTensor()}, - {&res.Tensor("add_out")}); + {&res.Tensor("add_out"), &res.OutputNoneTensor()}); } }; diff --git a/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc index 58409b2fbcb15..97b560e503265 100644 --- a/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc @@ -14,10 +14,12 @@ #include "paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/utils/general_functions.h" -#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_type.h" #include "paddle/pir/include/pass/pass.h" #include "paddle/pir/include/pass/pass_registry.h" @@ -32,19 +34,12 @@ class Fused2EmbeddingEltwiseLayernormPattern void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - + const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name()); const auto &add = pat.Op(paddle::dialect::AddOp::name()); - const auto &layernorm = - pat.Op(paddle::dialect::LayerNormOp::name(), - {{"epsilon", pat.Attr("epsilon")}, - {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}}); embedding_1({&pat.Tensor("x1"), &pat.Tensor("w1")}, {&pat.Tensor("embedding_1_out")}); @@ -57,14 +52,20 @@ class Fused2EmbeddingEltwiseLayernormPattern {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + + pat.RequireNativeCall([](const paddle::drr::MatchContext &match_ctx) { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + auto w2_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w2")); + if (w1_dtype != w2_dtype || (!w1_dtype.isa() && + !w1_dtype.isa())) { + return false; + } + auto x1_shape = pir::GetShapeFromValue(match_ctx.Tensor("x1")); auto x2_shape = pir::GetShapeFromValue(match_ctx.Tensor("x2")); - if (x1_shape.size() != x2_shape.size()) { return false; } - for (size_t i = 0; i < x1_shape.size(); i++) { if (x1_shape.at(i) != x2_shape.at(i)) { return false; @@ -76,13 +77,25 @@ class Fused2EmbeddingEltwiseLayernormPattern paddle::drr::ResultPattern res = pat.ResultPattern(); - auto &combine_op_1 = res.Op(pir::CombineOp::name()); + const auto &combine_op_1 = res.Op(pir::CombineOp::name()); combine_op_1({&res.Tensor("x1"), &res.Tensor("x2")}, {&res.Tensor("combine1_out")}); - auto &combine_op_2 = res.Op(pir::CombineOp::name()); + const auto &combine_op_2 = res.Op(pir::CombineOp::name()); combine_op_2({&res.Tensor("w1"), &res.Tensor("w2")}, {&res.Tensor("combine2_out")}); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + return paddle::dialect::TransToPhiDataType(w1_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_embedding_eltwise_layernorm_op = res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(), {{ @@ -90,8 +103,8 @@ class Fused2EmbeddingEltwiseLayernormPattern }}); fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"), &res.Tensor("combine2_out"), - &res.Tensor("bias"), - &res.Tensor("scale")}, + &res.Tensor("casted_bias"), + &res.Tensor("casted_scale")}, {&res.Tensor("layernorm_out")}); } }; @@ -105,21 +118,13 @@ class Fused3EmbeddingEltwiseLayernormPattern void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_3 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); + const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_3 = pat.Op(paddle::dialect::EmbeddingOp::name()); const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); const auto &add2 = pat.Op(paddle::dialect::AddOp::name()); - const auto &layernorm = - pat.Op(paddle::dialect::LayerNormOp::name(), - {{"epsilon", pat.Attr("epsilon")}, - {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}}); embedding_1({&pat.Tensor("x1"), &pat.Tensor("w1")}, {&pat.Tensor("embedding_1_out")}); @@ -136,7 +141,17 @@ class Fused3EmbeddingEltwiseLayernormPattern {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + + pat.RequireNativeCall([](const paddle::drr::MatchContext &match_ctx) { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + auto w2_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w2")); + auto w3_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w3")); + if (w1_dtype != w2_dtype || w1_dtype != w3_dtype || + (!w1_dtype.isa() && + !w1_dtype.isa())) { + return false; + } + auto x1_shape = pir::GetShapeFromValue(match_ctx.Tensor("x1")); auto x2_shape = pir::GetShapeFromValue(match_ctx.Tensor("x2")); auto x3_shape = pir::GetShapeFromValue(match_ctx.Tensor("x3")); @@ -146,7 +161,7 @@ class Fused3EmbeddingEltwiseLayernormPattern } for (size_t i = 0; i < x1_shape.size(); i++) { if (x1_shape.at(i) != x2_shape.at(i) || - x1_shape.at(i) != x2_shape.at(i)) { + x1_shape.at(i) != x3_shape.at(i)) { return false; } } @@ -162,6 +177,18 @@ class Fused3EmbeddingEltwiseLayernormPattern combine_op_2({&res.Tensor("w1"), &res.Tensor("w2"), &res.Tensor("w3")}, {&res.Tensor("combine2_out")}); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + return paddle::dialect::TransToPhiDataType(w1_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_embedding_eltwise_layernorm_op = res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(), {{ @@ -169,8 +196,8 @@ class Fused3EmbeddingEltwiseLayernormPattern }}); fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"), &res.Tensor("combine2_out"), - &res.Tensor("bias"), - &res.Tensor("scale")}, + &res.Tensor("casted_bias"), + &res.Tensor("casted_scale")}, {&res.Tensor("layernorm_out")}); } }; @@ -193,7 +220,6 @@ class EmbeddingEltwiseLayernormFusePass : public pir::PatternRewritePass { } // namespace namespace pir { - std::unique_ptr CreateFusedEmbeddingEltwiseLayerNormPass() { return std::make_unique(); } diff --git a/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc index d3e4ed862e741..fa0436d3e5f78 100644 --- a/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc @@ -36,13 +36,13 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { { {"in_num_col_dims", pat.Attr("in_num_col_dims")}, {"activation_type", pat.Attr("activation_type")}, - {"padding_weights", pat.Attr("padding_weights")}, }); const auto &add = pat.Op(paddle::dialect::AddOp::name()); const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), {{"epsilon", pat.Attr("epsilon")}, {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + fc({&pat.Tensor("x"), &pat.Tensor("w"), &pat.Tensor("bias0")}, {&pat.Tensor("fc_out")}); add({&pat.Tensor("fc_out"), &pat.Tensor("y")}, {&pat.Tensor("add_out")}); @@ -51,8 +51,14 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - // Constrains the activation is none + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + if (!x_dtype.isa() && + !x_dtype.isa()) { + return false; + } + int64_t layer_norm_x = 1; auto fc_out_dims = pir::GetShapeFromValue(match_ctx.Tensor("fc_out")); auto w_dims = pir::GetShapeFromValue(match_ctx.Tensor("w")); @@ -68,6 +74,18 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { }); paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + return paddle::dialect::TransToPhiDataType(x_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias1") = cast_op_1(res.Tensor("bias1")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_fc_elementwise_op = res.Op(paddle::dialect::FusedFcElementwiseLayernormOp::name(), {{ @@ -80,8 +98,8 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { &res.Tensor("w"), &res.Tensor("y"), &res.Tensor("bias0"), - &res.Tensor("scale"), - &res.Tensor("bias1")}, + &res.Tensor("casted_scale"), + &res.Tensor("casted_bias1")}, {&res.Tensor("layernorm_out"), &res.Tensor("layernorm_mean"), &res.Tensor("layernorm_variance")}); diff --git a/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc new file mode 100644 index 0000000000000..440aeee5f3ac5 --- /dev/null +++ b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc @@ -0,0 +1,489 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class FlashAttnPatternQscale : public paddle::drr::DrrPatternBase { + private: + bool softmax_with_cast_; + + public: + explicit FlashAttnPatternQscale(bool softmax_with_cast) + : softmax_with_cast_(softmax_with_cast) {} + + std::string name() const override { return "FlashAttnPatternQscale"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // check the transpose + // q[b, s, head, head_dim] -> transpose -> q[b, head, s, head_dim] -> scale + const auto &transpose_q = src.Op("pd_op.transpose"); + src.Tensor("q_transpose_out") = transpose_q(src.Tensor("q")); + // scale before matmul + const auto &scale_q = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_q_value")}}); + src.Tensor("q_scale_out") = + scale_q(src.Tensor("q_transpose_out"), full_scale()); + // k[b, s, head, head_dim] -> transpose -> k[b, head, s, head_dim] + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + const auto &transpose_k2 = src.Op("pd_op.transpose"); + src.Tensor("k_transpose2_out") = + transpose_k2(src.Tensor("k_transpose_out")); + // v[b, s, head, head_dim] -> transpose -> v[b, head, s, head_dim] + const auto &transpose_v = src.Op("pd_op.transpose"); + src.Tensor("v_transpose_out") = transpose_v(src.Tensor("v")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = + qk_matmul(src.Tensor("q_scale_out"), src.Tensor("k_transpose2_out")); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_out"), src.Tensor("mask")); + + if (softmax_with_cast_) { + // cast + softmax + cast + const auto &softmax_cast1 = src.Op("pd_op.cast"); + src.Tensor("softmax_cast1_out") = + softmax_cast1(src.Tensor("mask_add_out")); + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_cast2_in") = softmax(src.Tensor("softmax_cast1_out")); + const auto &softmax_cast2 = src.Op("pd_op.cast"); + src.Tensor("softmax_out") = softmax_cast2(src.Tensor("softmax_cast2_in")); + } else { + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + } + + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = context_matmul( + src.Tensor("softmax_out"), src.Tensor("v_transpose_out")); + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("q_transpose_out")); + auto k_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("k_transpose_out")); + auto v_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("v_transpose_out")); + if (q_transpose_out.size() != 4 || k_transpose_out.size() != 4 || + v_transpose_out.size() != 4 || + !(q_transpose_out.at(0) == k_transpose_out.at(0) && + k_transpose_out.at(0) == v_transpose_out.at(0)) || + !(q_transpose_out.at(1) == k_transpose_out.at(1) && + k_transpose_out.at(1) == v_transpose_out.at(1)) || + !(q_transpose_out.at(3) == k_transpose_out.at(3) && + k_transpose_out.at(3) == v_transpose_out.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q"), + &res.Tensor("k"), + &res.Tensor("v"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +// 1. scale after matmul +// 2. cast before and after softmax +class FlashAttnPatternOutscale : public paddle::drr::DrrPatternBase { + private: + bool softmax_with_cast_; + + public: + explicit FlashAttnPatternOutscale(bool softmax_with_cast) + : softmax_with_cast_(softmax_with_cast) {} + + public: + std::string name() const override { return "FlashAttnPatternOutscale"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // check the transpose, + // q[b, s, head, head_dim] -> transpose -> q[b, head, s, head_dim] -> scale + const auto &transpose_q = src.Op("pd_op.transpose"); + src.Tensor("q_transpose_out") = transpose_q(src.Tensor("q")); + // k[b, s, head, head_dim] -> transpose -> k[b, head, s, head_dim] + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + const auto &transpose_k2 = src.Op("pd_op.transpose"); + src.Tensor("k_transpose2_out") = + transpose_k2(src.Tensor("k_transpose_out")); + // v[b, s, head, head_dim] -> transpose -> v[b, head, s, head_dim] + const auto &transpose_v = src.Op("pd_op.transpose"); + src.Tensor("v_transpose_out") = transpose_v(src.Tensor("v")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = qk_matmul(src.Tensor("q_transpose_out"), + src.Tensor("k_transpose2_out")); + const auto &scale_out = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_out_value")}}); + src.Tensor("qk_scale_out") = scale_out(src.Tensor("qk_out"), full_scale()); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_scale_out"), src.Tensor("mask")); + + if (softmax_with_cast_) { + // cast + softmax + cast + const auto &softmax_cast1 = src.Op("pd_op.cast"); + src.Tensor("softmax_cast1_out") = + softmax_cast1(src.Tensor("mask_add_out")); + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_cast2_in") = softmax(src.Tensor("softmax_cast1_out")); + const auto &softmax_cast2 = src.Op("pd_op.cast"); + src.Tensor("softmax_out") = softmax_cast2(src.Tensor("softmax_cast2_in")); + } else { + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + } + + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = context_matmul( + src.Tensor("softmax_out"), src.Tensor("v_transpose_out")); + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("q_transpose_out")); + auto k_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("k_transpose_out")); + auto v_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("v_transpose_out")); + if (q_transpose_out.size() != 4 || k_transpose_out.size() != 4 || + v_transpose_out.size() != 4 || + !(q_transpose_out.at(0) == k_transpose_out.at(0) && + k_transpose_out.at(0) == v_transpose_out.at(0)) || + !(q_transpose_out.at(1) == k_transpose_out.at(1) && + k_transpose_out.at(1) == v_transpose_out.at(1)) || + !(q_transpose_out.at(3) == k_transpose_out.at(3) && + k_transpose_out.at(3) == v_transpose_out.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q"), + &res.Tensor("k"), + &res.Tensor("v"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +// slice qkv +class TransposeSliceFlashAttnPattern : public paddle::drr::DrrPatternBase { + public: + std::string name() const override { return "TransposeSliceFlashAttnPattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // transpose + const auto &transpose_qkv = + src.Op("pd_op.transpose", {{"perm", src.Attr("perm")}}); + src.Tensor("qkv_transpose") = transpose_qkv(src.Tensor("qkv")); + // slice q -> [b, head, s, head_dim] + const auto &slice_q = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_q")}, + {"infer_flags", src.Attr("infer_flags_q")}, + {"decrease_axis", src.Attr("decrease_axis_q")}}); + const auto &full_int_array_q1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_q2 = src.Op("pd_op.full_int_array"); + src.Tensor("q") = slice_q( + src.Tensor("qkv_transpose"), full_int_array_q1(), full_int_array_q2()); + // slice k -> [b, head, s, head_dim] + const auto &slice_k = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_k")}, + {"infer_flags", src.Attr("infer_flags_k")}, + {"decrease_axis", src.Attr("decrease_axis_k")}}); + const auto &full_int_array_k1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_k2 = src.Op("pd_op.full_int_array"); + src.Tensor("k") = slice_k( + src.Tensor("qkv_transpose"), full_int_array_k1(), full_int_array_k2()); + // slice v -> [b, head, s, head_dim] + const auto &slice_v = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_v")}, + {"infer_flags", src.Attr("infer_flags_v")}, + {"decrease_axis", src.Attr("decrease_axis_v")}}); + const auto &full_int_array_v1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_v2 = src.Op("pd_op.full_int_array"); + src.Tensor("v") = slice_v( + src.Tensor("qkv_transpose"), full_int_array_v1(), full_int_array_v2()); + + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = + qk_matmul(src.Tensor("q"), src.Tensor("k_transpose_out")); + // scale + const auto &scale_out = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_out_value")}}); + src.Tensor("qk_scale_out") = scale_out(src.Tensor("qk_out"), full_scale()); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_scale_out"), src.Tensor("mask")); + + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = + context_matmul(src.Tensor("softmax_out"), src.Tensor("v")); + // [b, head, s, head_dim] -> [b, s, head, head_dim] + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q = pir::GetShapeFromValue(match_ctx.Tensor("q")); + auto k = pir::GetShapeFromValue(match_ctx.Tensor("k")); + auto v = pir::GetShapeFromValue(match_ctx.Tensor("v")); + if (q.size() != 4 || k.size() != 4 || v.size() != 4 || + !(q.at(0) == k.at(0) && k.at(0) == v.at(0)) || + !(q.at(1) == k.at(1) && k.at(1) == v.at(1)) || + !(q.at(3) == k.at(3) && k.at(3) == v.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + // [b, head, seq_len, head_dim] -> [b, seq_len, head, head_dim] + const auto &q_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("q_transpose") = q_transpose(res.Tensor("q")); + const auto &k_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("k_transpose") = k_transpose(res.Tensor("k")); + const auto &v_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("v_transpose") = v_transpose(res.Tensor("v")); + + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q_transpose"), + &res.Tensor("k_transpose"), + &res.Tensor("v_transpose"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +class FusedFlashAttnPass : public pir::PatternRewritePass { + public: + FusedFlashAttnPass() : pir::PatternRewritePass("fused_flash_attn_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context, true)); + ps.Add(paddle::drr::Create(context, false)); + ps.Add(paddle::drr::Create(context, true)); + ps.Add(paddle::drr::Create(context, false)); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { +std::unique_ptr CreateFusedFlashAttnPass() { + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(fused_flash_attn_pass, FusedFlashAttnPass); diff --git a/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h new file mode 100644 index 0000000000000..14183174760bc --- /dev/null +++ b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateFusedFlashAttnPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc b/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc index 17bd3f48461e2..db41a0d5cb78a 100644 --- a/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc @@ -31,7 +31,7 @@ int getSMVersion() { sm_version = paddle::platform::GetGPUComputeCapability( paddle::platform::GetCurrentDeviceId()); #else - PADDLE_THROW(paddle::platform::errors::Unavailable( + PADDLE_THROW(common::errors::Unavailable( "fused_weight_only_linear_pass needs paddle compiled with CUDA.")); #endif return sm_version; @@ -41,10 +41,14 @@ class FusedWeightOnlyLinearWithBiasPattern : public paddle::drr::DrrPatternBase { private: bool reverse_add_; + std::string algo_; + int sm_version_; public: - explicit FusedWeightOnlyLinearWithBiasPattern(bool reverse_add) - : reverse_add_(reverse_add) {} + FusedWeightOnlyLinearWithBiasPattern(bool reverse_add, + const std::string &algo, + int sm_version) + : reverse_add_(reverse_add), algo_(algo), sm_version_(sm_version) {} std::string name() const override { return "FusedWeightOnlyLinearWithBiasPattern"; @@ -104,19 +108,49 @@ class FusedWeightOnlyLinearWithBiasPattern // paddle::drr::ResultPattern res = src.ResultPattern(); - const auto &weight_quantize = - res.Op(paddle::dialect::WeightQuantizeOp::name(), - {{"algo", res.StrAttr("weight_only_int8")}, - {"arch", res.Int32Attr(getSMVersion())}, - {"group_size", res.Int32Attr(-1)}}); - weight_quantize({&res.Tensor("w")}, - {&res.Tensor("quanted_weight_tensor"), - &res.Tensor("weight_scale_tensor")}); + if (algo_ == "weight_only_int4") { + // TODO(liuyuanle): When the operator weight_quantize supports + // weight_only_int4 on gpu version, delete the memory copy. + const auto &memcpy_d2h = + res.Op(paddle::dialect::MemcpyD2hOp::name(), + {{"dst_place_type", res.Int32Attr(0 /*cpu*/)}}); + res.Tensor("w_cpu") = memcpy_d2h(res.Tensor("w")); + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + weight_quantize({&res.Tensor("w_cpu")}, + {&res.Tensor("quanted_weight_tensor_cpu"), + &res.Tensor("weight_scale_tensor_cpu")}); + + const auto &memcpy_h2d_1 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("quanted_weight_tensor") = + memcpy_h2d_1(res.Tensor("quanted_weight_tensor_cpu")); + const auto &memcpy_h2d_2 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("weight_scale_tensor") = + memcpy_h2d_2(res.Tensor("weight_scale_tensor_cpu")); + } else { + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + + weight_quantize({&res.Tensor("w")}, + {&res.Tensor("quanted_weight_tensor"), + &res.Tensor("weight_scale_tensor")}); + } const auto &weight_only_linear = res.Op(paddle::dialect::WeightOnlyLinearOp::name(), - {{"weight_dtype", res.StrAttr("int8")}, - {"arch", res.Int32Attr(getSMVersion())}, + {{"weight_dtype", + res.StrAttr(algo_ == "weight_only_int8" ? "int8" : "int4")}, + {"arch", res.Int32Attr(sm_version_)}, {"group_size", res.Int32Attr(-1)}}); weight_only_linear({&res.Tensor("x"), &res.Tensor("quanted_weight_tensor"), @@ -127,6 +161,14 @@ class FusedWeightOnlyLinearWithBiasPattern }; class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { + private: + std::string algo_; + int sm_version_; + + public: + FusedWeightOnlyLinearNoBiasPattern(const std::string &algo, int sm_version) + : algo_(algo), sm_version_(sm_version) {} + public: std::string name() const override { return "FusedWeightOnlyLinearNoBiasPattern"; @@ -179,19 +221,48 @@ class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { // paddle::drr::ResultPattern res = src.ResultPattern(); - const auto &weight_quantize = - res.Op(paddle::dialect::WeightQuantizeOp::name(), - {{"algo", res.StrAttr("weight_only_int8")}, - {"arch", res.Int32Attr(getSMVersion())}, - {"group_size", res.Int32Attr(-1)}}); - weight_quantize({&res.Tensor("w")}, - {&res.Tensor("quanted_weight_tensor"), - &res.Tensor("weight_scale_tensor")}); - + if (algo_ == "weight_only_int4") { + // TODO(liuyuanle): When the operator weight_quantize supports + // weight_only_int4 on gpu version, delete the memory copy. + const auto &memcpy_d2h = + res.Op(paddle::dialect::MemcpyD2hOp::name(), + {{"dst_place_type", res.Int32Attr(0 /*cpu*/)}}); + res.Tensor("w_cpu") = memcpy_d2h(res.Tensor("w")); + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + weight_quantize({&res.Tensor("w_cpu")}, + {&res.Tensor("quanted_weight_tensor_cpu"), + &res.Tensor("weight_scale_tensor_cpu")}); + + const auto &memcpy_h2d_1 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("quanted_weight_tensor") = + memcpy_h2d_1(res.Tensor("quanted_weight_tensor_cpu")); + const auto &memcpy_h2d_2 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("weight_scale_tensor") = + memcpy_h2d_2(res.Tensor("weight_scale_tensor_cpu")); + } else { + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + + weight_quantize({&res.Tensor("w")}, + {&res.Tensor("quanted_weight_tensor"), + &res.Tensor("weight_scale_tensor")}); + } const auto &weight_only_linear = res.Op(paddle::dialect::WeightOnlyLinearOp::name(), - {{"weight_dtype", res.StrAttr("int8")}, - {"arch", res.Int32Attr(getSMVersion())}, + {{"weight_dtype", + res.StrAttr(algo_ == "weight_only_int8" ? "int8" : "int4")}, + {"arch", res.Int32Attr(sm_version_)}, {"group_size", res.Int32Attr(-1)}}); weight_only_linear({&res.Tensor("x"), &res.Tensor("quanted_weight_tensor"), @@ -204,15 +275,28 @@ class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { class FusedWeightOnlyLinearPass : public pir::PatternRewritePass { public: FusedWeightOnlyLinearPass() - : pir::PatternRewritePass("fused_weight_only_linear_pass", 4) {} + : pir::PatternRewritePass("fused_weight_only_linear_pass", 4), + sm_version_(getSMVersion()) {} pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + std::string algo = "weight_only_int4"; + if (Has("weight_only_algo")) { + algo = Get("weight_only_algo"); + } + PADDLE_ENFORCE_EQ(algo == "weight_only_int8" || algo == "weight_only_int4", + true, + common::errors::InvalidArgument( + "fused_weight_only_linear_pass only support " + "weight_only_int8 or weight_only_int4, but get %s.", + algo)); + pir::RewritePatternSet ps(context); - ps.Add(paddle::drr::Create(context, - true)); - ps.Add(paddle::drr::Create(context, - false)); - ps.Add(paddle::drr::Create(context)); + ps.Add(paddle::drr::Create( + context, true, algo, sm_version_)); + ps.Add(paddle::drr::Create( + context, false, algo, sm_version_)); + ps.Add(paddle::drr::Create( + context, algo, sm_version_)); return ps; } @@ -228,15 +312,15 @@ class FusedWeightOnlyLinearPass : public pir::PatternRewritePass { } bool CanApplyOn(pir::Operation *op) const override { - int sm_version = getSMVersion(); - if (sm_version != 70 && sm_version != 75 && sm_version != 80 && - sm_version != 86) { + if (sm_version_ != 70 && sm_version_ != 75 && sm_version_ != 80 && + sm_version_ != 86) { return false; } return op->num_regions() > 0; } private: + int sm_version_; pir::FrozenRewritePatternSet patterns_; }; diff --git a/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..2c715ab9b437c --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc @@ -0,0 +1,574 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class ConvActivationFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t activation_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ > 0 : conv2d + bias + activation + : conv2d + residual + activation + : conv2d + + bias + residual + activation + */ + const int fused_level_; + + public: + ConvActivationFusePattern(size_t activation_count, + const std::string &activation_name, + int fused_level) + : activation_count_(activation_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(fused_level_) + activation_name_ + + "FusePattern"; + } + + uint32_t benefit() const override { return activation_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + std::string activation_name_op = "pd_op." + activation_name_; + if (activation_name_ == "hard_swish") { + // oneDNN use hard_swish, paddle use hardswish + activation_name_op = "pd_op.hardswish"; + } else if (activation_name_ == "hard_sigmoid") { + activation_name_op = "pd_op.hardsigmoid"; + } + + std::unordered_map act_attrs; + if (activation_name_op == paddle::dialect::HardsigmoidOp::name()) { + act_attrs.emplace("slope", pat.Attr("slope")); + act_attrs.emplace("offset", pat.Attr("offset")); + } else if (activation_name_op == paddle::dialect::LeakyReluOp::name()) { + act_attrs.emplace("negative_slope", pat.Attr("negative_slope")); + } else if (activation_name_op == paddle::dialect::GeluOp::name()) { + act_attrs.emplace("approximate", pat.Attr("approximate")); + } + const auto &activation = pat.Op(activation_name_op, act_attrs); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + pat.Tensor("act_out") = activation(pat.Tensor("conv2d_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (activation_name_ == "leaky_relu") { + float negative_slope = match_ctx.Attr("negative_slope"); + // leaky relu alpha is a positive number + if (negative_slope <= 0.0) { + return false; + } + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + auto fuse_beta = res.Float32Attr(0.0f); + auto fuse_alpha = res.Float32Attr(0.0f); + if (activation_name_ == "relu6") { + fuse_beta = res.Float32Attr(6.0f); + } else if (activation_name_ == "hard_swish") { + // hard swish have not attr float threshold = 6.0f, float scale = 6.0f, + // float offset = 3.0f attr But in previous implementation hard swish, + // fuse_alpha=1.f / 6.f, fuse_beta=1.f / 2.f, it has fixed + fuse_beta = res.Float32Attr(1.f / 2.f); + fuse_alpha = res.Float32Attr(1.f / 6.f); + } else if (activation_name_ == "swish") { + fuse_alpha = res.Float32Attr(1.0f); + } else if (activation_name_ == "leaky_relu") { + fuse_alpha = pat.Attr("negative_slope"); + } else if (activation_name_ == "hard_sigmoid") { + fuse_alpha = pat.Attr("slope"); + fuse_beta = pat.Attr("offset"); + } + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvGeluFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string activation_name_; + const int fused_level_; + + public: + ConvGeluFusePattern(const std::string &activation_name, int fused_level) + : activation_name_(activation_name), fused_level_(fused_level) {} + + std::string name() const override { return "ConvGeluFusePattern"; } + + uint32_t benefit() const override { return fused_level_ + 1; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + const auto &activation = + pat.Op(activation_name_, {{"approximate", pat.Attr("approximate")}}); + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + + pat.Tensor("act_out") = activation(pat.Tensor("conv2d_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &gelu = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::string { + bool approximate = match_ctx.Attr("approximate"); + if (approximate) return "gelu_tanh"; + return "gelu_erf"; + }); + auto fuse_residual = res.BoolAttr(false); + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvClipFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string activation_name_; + const int fused_level_; + + public: + ConvClipFusePattern(const std::string &activation_name, int fused_level) + : activation_name_(activation_name), fused_level_(fused_level) {} + + std::string name() const override { return "ConvClipFusePattern"; } + + uint32_t benefit() const override { return fused_level_ + 1; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &full_1 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_1_value")}}); + const auto &full_2 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_2_value")}}); + pat.Tensor("min") = full_1(); + pat.Tensor("max") = full_2(); + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + const auto &activation = pat.Op(activation_name_); + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + pat.Tensor("act_out") = activation( + pat.Tensor("conv2d_out"), pat.Tensor("min"), pat.Tensor("max")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvActFusePass : public pir::PatternRewritePass { + public: + ConvActFusePass() + : pir::PatternRewritePass("conv_activation_mkldnn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + + // This eleven activations have no extra attribute, can use the same pattern + std::vector supported_activations_name = {"abs", + "sqrt", + "mish", + "relu", + "sigmoid", + "tanh", + "relu6", + "hard_swish", + "swish", + "leaky_relu", + "hard_sigmoid"}; + + size_t pattern_num = 1; + // conv + activation -> fused_conv2d + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, pattern_num, activation, 0)); + pattern_num++; + } + + // conv + bias(residual / residual + bias) + // -> fused_conv2d + activation -> fused_conv2d + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, pattern_num, activation, 1)); + pattern_num++; + } + + ps.Add(paddle::drr::Create( + context, paddle::dialect::GeluOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, paddle::dialect::GeluOp::name(), 1)); + + ps.Add(paddle::drr::Create( + context, paddle::dialect::ClipOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, paddle::dialect::ClipOp::name(), 1)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateConv2dActFusePass() { + /** + * conv + * | -> fused_conv + * activation + * + * fused_conv2d (bias or residual) + * | -> fused_conv2d + * activation + */ + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(conv_activation_onednn_fuse_pass, ConvActFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h new file mode 100644 index 0000000000000..520449bbd028e --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateConv2dActFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..5f2da932bb2af --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc @@ -0,0 +1,1039 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class NConvConcatActivationFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + const int benefit_; + + public: + NConvConcatActivationFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level, + int benefit) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level), + benefit_(benefit) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + activation_name_ + "Pattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + std::string activation_name_op = "pd_op." + activation_name_; + if (activation_name_ == "hard_swish") { + // oneDNN use hard_swish, paddle use hardswish + activation_name_op = "pd_op.hardswish"; + } + const auto &activation = + activation_name_op != "pd_op.leaky_relu" + ? pat.Op(activation_name_op) + : pat.Op(activation_name_op, + {{"negative_slope", pat.Attr("negative_slope")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (activation_name_ == "leaky_relu") { + float negative_slope = match_ctx.Attr("negative_slope"); + // leaky relu alpha is a positive number + if (negative_slope <= 0.0) { + return false; + } + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + auto fuse_beta = res.Float32Attr(0.0f); + auto fuse_alpha = res.Float32Attr(0.0f); + if (activation_name_ == "relu6") { + fuse_beta = res.Float32Attr(6.0f); + } else if (activation_name_ == "hard_swish") { + // hard swish have not attr float threshold = 6.0f, float scale = 6.0f, + // float offset = 3.0f attr But in previous implementation hard swish, + // fuse_alpha=1.f / 6.f, fuse_beta=1.f / 2.f, it has fixed + fuse_beta = res.Float32Attr(1.f / 2.f); + fuse_alpha = res.Float32Attr(1.f / 6.f); + } else if (activation_name_ == "swish") { + fuse_alpha = res.Float32Attr(1.0f); + } else if (activation_name_ == "leaky_relu") { + fuse_alpha = pat.Attr("negative_slope"); + } + + std::vector combine_result_in; + // int input_num = 1; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatHardSigmoidFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatHardSigmoidFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "HardSigmoidPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &activation = + pat.Op(activation_name_, + {{"slope", pat.Attr("slope")}, {"offset", pat.Attr("offset")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("hard_sigmoid")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("slope")}, + {"fuse_beta", pat.Attr("offset")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr("hard_sigmoid")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", pat.Attr("slope")}, + {"fuse_beta", pat.Attr("offset")}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatGeluFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatGeluFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "GeluPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &activation = + pat.Op(activation_name_, {{"approximate", pat.Attr("approximate")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + const auto &gelu = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::string { + bool approximate = match_ctx.Attr("approximate"); + if (approximate) return "gelu_tanh"; + return "gelu_erf"; + }); + + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatClipFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatClipFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "ClipPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &full_1 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_1_value")}}); + const auto &full_2 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_2_value")}}); + pat.Tensor("min") = full_1(); + pat.Tensor("max") = full_2(); + + const auto &activation = pat.Op(activation_name_); + + pat.Tensor("activation_out") = activation( + pat.Tensor("concat_out"), pat.Tensor("min"), pat.Tensor("max")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class ConvConcatActFusePass : public pir::PatternRewritePass { + public: + ConvConcatActFusePass() + : pir::PatternRewritePass("conv_concat_activation_mkldnn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + std::vector supported_activations_name = {"abs", + "sqrt", + "mish", + "relu", + "sigmoid", + "tanh", + "relu6", + "hard_swish", + "swish", + "leaky_relu"}; + int benefit = 1; + /** + * To avoid many for loop patterns to reduce efficiency + * We just support 6 conv2d concat now + * And concat in OneDNN with a large number of concat ops + * performance is worse than CPU kernel. + */ + /** + * fused_level 0: conv2d + activation + * 1: fused_conv2d + activation + */ + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, concat_num, activation, 0, benefit++)); + ps.Add(paddle::drr::Create( + context, concat_num, activation, 1, benefit++)); + } + } + + /** + * These activation use separate pattern to avoid to too large of benefit + */ + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::HardsigmoidOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::HardsigmoidOp::name(), 1)); + } + + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::GeluOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::GeluOp::name(), 1)); + } + + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::ClipOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::ClipOp::name(), 1)); + } + + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateConv2dConcatActFusePass() { + // /** + // * This pass must execution before conv_activation_mkldnn_fuse_pass + // * conv conv conv conv conv conv fused_conv + // fused_conv fused_conv + // * \ / ... | | ... \ / + // ..... + // * concat -> act act -> concat + // * | \ / + // * act concat + // */ + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(conv_concat_activation_onednn_fuse_pass, + ConvConcatActFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h new file mode 100644 index 0000000000000..972d594569684 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateConv2dConcatActFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc similarity index 99% rename from paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc index 4ecd752b85997..c367712927dcc 100644 --- a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" @@ -385,7 +385,7 @@ class FusedConvBiasElementwiseAddAsYPattern class ConvElementwiseAddFusePass : public pir::PatternRewritePass { public: ConvElementwiseAddFusePass() - : pir::PatternRewritePass("conv_elementwise_add_mkldnn_fuse_pass", 3) {} + : pir::PatternRewritePass("conv_elementwise_add_onednn_fuse_pass", 3) {} pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { pir::RewritePatternSet ps(context); @@ -421,5 +421,5 @@ std::unique_ptr CreateConvElementwiseAddFusePass() { } // namespace pir -REGISTER_IR_PASS(conv_elementwise_add_mkldnn_fuse_pass, +REGISTER_IR_PASS(conv_elementwise_add_onednn_fuse_pass, ConvElementwiseAddFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc new file mode 100644 index 0000000000000..5b89ac9a1f0f7 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class DepthwiseConvPattern : public paddle::drr::DrrPatternBase { + private: + std::string depthwise_conv_name_; + + public: + explicit DepthwiseConvPattern(const std::string &conv_name) + : depthwise_conv_name_(conv_name) {} + + std::string name() const override { return "DepthwiseConvPattern"; } + + uint32_t benefit() const override { return 2; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &depthwise_conv = + pat.Op(depthwise_conv_name_, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}); + + depthwise_conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv_out")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + std::set padding_algorithm = {"EXPLICIT", "SAME", "VALID"}; + std::set data_format = {"NCHW", "NHWC", "AnyLayout"}; + if (padding_algorithm.count( + match_ctx.Attr("padding_algorithm")) == 0 || + data_format.count(match_ctx.Attr("data_format")) == 0 || + match_ctx.Attr("groups") < 1) { + return false; + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &conv2d = + res.Op(paddle::dialect::Conv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + }}); + + conv2d({&res.Tensor("input"), &res.Tensor("filter")}, + {&res.Tensor("conv_out")}); + } +}; + +class DepthwiseConvMKLDNNPass : public pir::PatternRewritePass { + public: + DepthwiseConvMKLDNNPass() + : pir::PatternRewritePass("depthwise_conv_mkldnn_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create( + context, paddle::dialect::DepthwiseConv2dOp::name())); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateDepthwiseConvMKLDNNPass() { + // pd_op.depthwise_conv -> pd_op.conv2d + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(depthwise_conv_onednn_pass, DepthwiseConvMKLDNNPass); diff --git a/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h new file mode 100644 index 0000000000000..9f91993ce8dbe --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateDepthwiseConvMKLDNNPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc index 1db28281578d4..45f182c955f16 100644 --- a/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc @@ -92,16 +92,6 @@ class MatmulActivationFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - if (act_type_ == paddle::dialect::GeluOp::name()) { pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); @@ -187,15 +177,6 @@ class MatmulGeluTanhFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); if (!result_gelu) return false; @@ -272,16 +253,6 @@ class MatmulClipFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out"), pat.Tensor("min"), pat.Tensor("max")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - paddle::drr::ResultPattern res = pat.ResultPattern(); std::unordered_map fused_attrs{ @@ -375,16 +346,11 @@ class FusedMatmulActivationFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); + if (act_type_ == paddle::dialect::GeluOp::name()) { pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); @@ -490,16 +456,11 @@ class FusedMatmulGeluTanhFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); if (!result_gelu) return false; @@ -597,14 +558,8 @@ class FusedMatmulClipFusePattern : public paddle::drr::DrrPatternBase { act(pat.Tensor("Out"), pat.Tensor("min"), pat.Tensor("max")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); @@ -645,7 +600,6 @@ class MatmulActivationFusePass : public pir::PatternRewritePass { pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { pir::RewritePatternSet ps(context); - // std::vector bool_set = {false, true}; int benefit_idx = 1; for (auto act_op : act_ops) { ps.Add(paddle::drr::Create( diff --git a/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc index 68354c52e2fe5..91ce0f80018c5 100644 --- a/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc @@ -59,16 +59,6 @@ class MatmulElementwiseAddFusePattern : public paddle::drr::DrrPatternBase { as_x_ ? add(pat.Tensor("Out"), pat.Tensor("residual")) : add(pat.Tensor("residual"), pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - paddle::drr::ResultPattern res = pat.ResultPattern(); const auto &fused_matmul = @@ -157,16 +147,6 @@ class FusedMatmulElementwiseAddFusePattern as_x_ ? add(pat.Tensor("Out"), pat.Tensor("residual")) : add(pat.Tensor("residual"), pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto none_tensor = match_ctx.Tensor("none"); if (none_tensor.impl() != nullptr) { diff --git a/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc new file mode 100644 index 0000000000000..246cde678593c --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc @@ -0,0 +1,271 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { +class MatmulTransposeReshapeFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + + public: + MatmulTransposeReshapeFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit) {} + + std::string name() const override { + return "MatmulTransposeReshapeFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &matmul = pat.Op(matmul_name_, + {{"transpose_x", pat.Attr("transpose_x")}, + {"transpose_y", pat.Attr("transpose_y")}}); + matmul({&pat.Tensor("X"), &pat.Tensor("Y")}, {&pat.Tensor("Out")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("Out")); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("transpose_out"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + const std::vector supported_axis{0, 2, 1, 3}; + if (perm != supported_axis) return false; + if (shape.size() != 3) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", res.Float32Attr(1.0f)}, + {"fuse_activation", res.StrAttr("")}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"fused_output_scale", res.Float32Attr(1.0f)}, + {"fused_reshape_x", res.VectorInt32Attr({})}, + {"fused_transpose_x", res.VectorInt32Attr({})}, + {"fused_reshape_y", res.VectorInt32Attr({})}, + {"fused_transpose_y", res.VectorInt32Attr({})}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"scale_x", res.Float32Attr(1.0f)}, + {"scale_y", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(0.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"force_fp32_output", res.BoolAttr(false)}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + fused_attrs.emplace("fused_reshape_out", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_out", pat.Attr("perm")); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.InputNoneTensor()}, + {&res.Tensor("reshape_out")}); + } +}; + +class FusedMatmulTransposeReshapeFusePattern + : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + + public: + FusedMatmulTransposeReshapeFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit) {} + + std::string name() const override { + return "FusedMatmulTransposeReshapeFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); + + matmul({&pat.Tensor("X"), &pat.Tensor("Y"), &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("Out")); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("transpose_out"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + const std::vector supported_axis{0, 2, 1, 3}; + if (perm != supported_axis) return false; + if (shape.size() != 3) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + + return true; + }); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (!(match_ctx.Attr>("fused_reshape_out").empty())) + return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + fused_attrs.emplace("fused_reshape_out", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_out", pat.Attr("perm")); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.Tensor("residual")}, + {&res.Tensor("reshape_out")}); + } +}; + +class MatmulTransposeReshapeFusePass : public pir::PatternRewritePass { + public: + MatmulTransposeReshapeFusePass() + : pir::PatternRewritePass("matmul_transpose_reshape_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + int benefit_idx = 1; + ps.Add(paddle::drr::Create( + context, + paddle::dialect::MatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx++)); + + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx++)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateMatmulTransposeReshapeFusePass() { + // pd_op.matmul + pd_op.transpose + pd_op.reshape -> onednn_op.fused_matmul + // pd_op.fused_matmul + pd_op.transpose + pd_op.reshape -> + // onednn_op.fused_matmul + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(matmul_transpose_reshape_fuse_pass, + MatmulTransposeReshapeFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h new file mode 100644 index 0000000000000..c56fa7ee62d7a --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateMatmulTransposeReshapeFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc index d317fc006300c..d249a2174ed88 100644 --- a/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc @@ -71,16 +71,6 @@ class ReshapeTransposeMatmulFusePattern : public paddle::drr::DrrPatternBase { {&pat.Tensor("Out")}); } - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto shape = match_ctx.Attr>("int_array"); auto perm = match_ctx.Attr>("perm"); @@ -219,16 +209,6 @@ class ReshapeTransposeFusedMatmulFusePattern {&pat.Tensor("Out")}); } - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto shape = match_ctx.Attr>("int_array"); auto perm = match_ctx.Attr>("perm"); diff --git a/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc new file mode 100644 index 0000000000000..07a26a6beee34 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc @@ -0,0 +1,296 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { +class ScaleMatmulFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of scale is for input_x of matmul + + public: + ScaleMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { return "ScaleMatmulFusePattern"; } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("scale_")}}); + pat.Tensor("scale") = full(); + + const auto &scale = + pat.Op(paddle::dialect::ScaleOp::name(), + {{"bias", pat.Attr("bias")}, + {"bias_after_scale", pat.Attr("bias_after_scale")}}); + scale({&pat.Tensor("scale_in"), &pat.Tensor("scale")}, + {&pat.Tensor("scale_out")}); + + const auto &matmul = pat.Op(matmul_name_, + {{"transpose_x", pat.Attr("transpose_x")}, + {"transpose_y", pat.Attr("transpose_y")}}); + if (as_x_) { + matmul({&pat.Tensor("scale_out"), &pat.Tensor("other")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), &pat.Tensor("scale_out")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto scale = match_ctx.Attr("scale_"); + auto bias = match_ctx.Attr("bias"); + // conditions align with fluid pass + if (bias != 0.0f) return false; + if (scale <= 0.0f) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"fuse_activation", res.StrAttr("")}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"fused_reshape_x", res.VectorInt32Attr({})}, + {"fused_transpose_x", res.VectorInt32Attr({})}, + {"fused_reshape_y", res.VectorInt32Attr({})}, + {"fused_transpose_y", res.VectorInt32Attr({})}, + {"fused_output_scale", res.Float32Attr(1.0f)}, + {"fused_reshape_out", res.VectorInt32Attr({})}, + {"fused_transpose_out", res.VectorInt32Attr({})}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"scale_x", res.Float32Attr(1.0f)}, + {"scale_y", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(0.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"force_fp32_output", res.BoolAttr(false)}}; + + const auto &matmul_alpha_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> float { + auto scale = match_ctx.Attr("scale_"); + return scale; + }); + + fused_attrs.emplace("matmul_alpha", matmul_alpha_attr); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("scale_in"), + &res.Tensor("other"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("scale_in"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } + } +}; + +class ScaleFusedMatmulFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of transpose is for input_x of matmul + + public: + ScaleFusedMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { return "ScaleFusedMatmulFusePattern"; } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("scale_")}}); + pat.Tensor("scale") = full(); + + const auto &scale = + pat.Op(paddle::dialect::ScaleOp::name(), + {{"bias", pat.Attr("bias")}, + {"bias_after_scale", pat.Attr("bias_after_scale")}}); + scale({&pat.Tensor("scale_in"), &pat.Tensor("scale")}, + {&pat.Tensor("scale_out")}); + + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); + if (as_x_) { + matmul({&pat.Tensor("scale_out"), + &pat.Tensor("other"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), + &pat.Tensor("scale_out"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto matmul_alpha = match_ctx.Attr("matmul_alpha"); + auto scale = match_ctx.Attr("scale_"); + auto bias = match_ctx.Attr("bias"); + // conditions align with fluid pass + if (matmul_alpha == 0.0f) return false; + if (bias != 0.0f) return false; + if (scale <= 0.0f) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}; + + const auto &matmul_alpha_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> float { + auto scale = match_ctx.Attr("scale_"); + auto matmul_alpha = match_ctx.Attr("matmul_alpha"); + return scale * matmul_alpha; + }); + + fused_attrs.emplace("matmul_alpha", matmul_alpha_attr); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("scale_in"), + &res.Tensor("other"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("scale_in"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } + } +}; + +class ScaleMatmulFusePass : public pir::PatternRewritePass { + public: + ScaleMatmulFusePass() + : pir::PatternRewritePass("reshape_transpose_matmul_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + std::vector bool_set = {false, true}; + int benefit_idx = 5; + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::dialect::MatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateScaleMatmulFusePass() { + // pd_op.reshape + pd_op.transpose + pd_op.matmul -> onednn_op.fused_matmul + // pd_op.reshape + pd_op.transpose + pd_op.fused_matmul -> + // onednn_op.fused_matmul + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(scale_matmul_fuse_pass, ScaleMatmulFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h new file mode 100644 index 0000000000000..2ba8a3787e5dc --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateScaleMatmulFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..e1f7250de2932 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class SqueezeTransposePattern : public paddle::drr::DrrPatternBase { + public: + SqueezeTransposePattern() {} + + std::string name() const override { return "SqueezeTransposePattern"; } + + uint32_t benefit() const override { return 2; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &squeeze = pat.Op(paddle::dialect::SqueezeOp::name()); + const auto &full_1 = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("full_1_value")}}); + + squeeze({&pat.Tensor("x"), &full_1()}, + {&pat.Tensor("squeeze_out"), &pat.Tensor("xshape")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + + transpose({&pat.Tensor("squeeze_out")}, {&pat.Tensor("transpose_op_out")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto axis = match_ctx.Attr>("full_1_value"); + auto perm = match_ctx.Attr>("perm"); + if (perm.size() <= 0) return false; + if (axis.size() <= 0) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("full_1_value"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + const auto &fused_transpose = + res.Op(paddle::onednn::dialect::FusedTransposeOp::name(), + {{ + {"axis", pat.Attr("perm")}, + {"fused_squeeze2_axes", fused_reshape_attr}, + {"fused_unsqueeze2_axes", res.VectorInt32Attr({})}, + {"fused_reshape2_shape", res.VectorInt32Attr({})}, + {"scale", res.Float32Attr(1.0f)}, + {"shift", res.Float32Attr(0.0f)}, + {"output_data_type", res.StrAttr("fp32")}, + {"data_format", res.StrAttr("AnyLayout")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + }}); + fused_transpose({&res.Tensor("x")}, {&res.Tensor("transpose_op_out")}); + } +}; + +class SqueezeTransposePass : public pir::PatternRewritePass { + public: + SqueezeTransposePass() + : pir::PatternRewritePass("squeeze_transpose_onednn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateSqueezeTransposeOneDNNPass() { + // pd_op.squeeze + transpose2 -> onednn_op.fused_transpose + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(squeeze_transpose_onednn_fuse_pass, SqueezeTransposePass); diff --git a/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h new file mode 100644 index 0000000000000..fce3e0f6e8a80 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateSqueezeTransposeOneDNNPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/passes.h b/paddle/fluid/pir/transforms/passes.h index 2423bfbc8efc2..20e0e5ee01a5b 100644 --- a/paddle/fluid/pir/transforms/passes.h +++ b/paddle/fluid/pir/transforms/passes.h @@ -38,18 +38,26 @@ USE_PIR_PASS(conv2d_add_act_fuse_pass); USE_PIR_PASS(embedding_eltwise_layernorm_fuse_pass); USE_PIR_PASS(add_norm_fuse_pass); USE_PIR_PASS(fused_dot_product_attention_pass); +USE_PIR_PASS(fused_flash_attn_pass); #ifdef PADDLE_WITH_DNNL +USE_PIR_PASS(depthwise_conv_onednn_pass); +USE_PIR_PASS(squeeze_transpose_onednn_fuse_pass); USE_PIR_PASS(batch_norm_act_fuse_pass); USE_PIR_PASS(conv2d_bias_fuse_pass); USE_PIR_PASS(conv2d_transpose_bias_fuse_pass); USE_PIR_PASS(conv3d_bias_fuse_pass); +USE_PIR_PASS(scale_matmul_fuse_pass); USE_PIR_PASS(reshape_transpose_matmul_fuse_pass); +USE_PIR_PASS(matmul_transpose_reshape_fuse_pass); USE_PIR_PASS(matmul_elementwise_add_fuse_pass); USE_PIR_PASS(matmul_activation_fuse_pass); -USE_PIR_PASS(conv_elementwise_add_mkldnn_fuse_pass); +USE_PIR_PASS(conv_elementwise_add_onednn_fuse_pass); +USE_PIR_PASS(conv_activation_onednn_fuse_pass); +USE_PIR_PASS(conv_concat_activation_onednn_fuse_pass); #endif #ifdef PADDLE_WITH_XPU USE_PIR_PASS(add_layernorm_xpu_fuse_pass); +USE_PIR_PASS(group_norm_silu_xpu_fuse_pass); #endif diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc index 182aa009a020c..43a3e2237036b 100644 --- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc @@ -754,7 +754,7 @@ static phi::Backend GetKernelBackendByYaml( auto& backend_info = op_info_parser->OpRuntimeInfo().kernel_key_backend; phi::Backend kernel_backend = phi::Backend::UNDEFINED; - for (auto slot_name : backend_info) { + for (const auto& slot_name : backend_info) { auto& input_map = op_info_parser->InputName2Id(); if (input_map.count(slot_name)) { diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc new file mode 100644 index 0000000000000..648df644bed5d --- /dev/null +++ b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +/* +fuse gn + activation block in to group_norm_silu op +For example: +graph: + X + Scale | Bias + \ | / + group norm + / | \ + / | \ + variance | mean + | + silu + | + output +------------------------------------------------------ +After the pass is applied: + X + Scale | Bias + \ | / + group_norm_silu + | + Out +*/ + +namespace { + +class GroupNormSiluPattern : public paddle::drr::DrrPatternBase { + public: + std::string name() const override { return "GroupNormSiluPattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + const auto &groupnorm = pat.Op( + paddle::dialect::GroupNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}); + + const auto &silu = pat.Op(paddle::dialect::SiluOp::name()); + + groupnorm({&pat.Tensor("X"), &pat.Tensor("Bias"), &pat.Tensor("Scale")}, + {&pat.Tensor("Y"), &pat.Tensor("Mean"), &pat.Tensor("Variance")}); + silu({&pat.Tensor("Y")}, {&pat.Tensor("Out")}); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &group_norm_silu_xpu = res.Op( + paddle::dialect::GroupNormSiluXpuOp::name(), + {{{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}}); + group_norm_silu_xpu( + {&res.Tensor("X"), &res.Tensor("Bias"), &res.Tensor("Scale")}, + {&res.Tensor("Out")}); + } +}; + +class GroupNormSiluXpuFusePass : public pir::PatternRewritePass { + public: + GroupNormSiluXpuFusePass() + : pir::PatternRewritePass("group_norm_silu_xpu_fuse_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { +std::unique_ptr CreateGroupNormSiluXpuFusePass() { + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(group_norm_silu_xpu_fuse_pass, GroupNormSiluXpuFusePass); diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h new file mode 100644 index 0000000000000..665c7dcb03f16 --- /dev/null +++ b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateGroupNormSiluXpuFusePass(); + +} // namespace pir diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 2cabc79bb3844..99a6606d1183a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -41,10 +41,10 @@ cc_test( SRCS place_test.cc DEPS place glog phi common) -if(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) +if(WITH_ONEDNN) + set(ONEDNN_CTX_DEPS onednn) else() - set(MKLDNN_CTX_DEPS) + set(ONEDNN_CTX_DEPS) endif() add_subdirectory(device) @@ -126,7 +126,7 @@ cc_library( framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} - ${MKLDNN_CTX_DEPS} + ${ONEDNN_CTX_DEPS} ${dgc_deps} dlpack phi diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc index 36189cc7e4c90..73704b04cf90b 100644 --- a/paddle/fluid/platform/device/gpu/gpu_info.cc +++ b/paddle/fluid/platform/device/gpu/gpu_info.cc @@ -217,6 +217,7 @@ class RecordedGpuMallocHelper { CUDADeviceGuard guard(dev_id_); gpuError_t result; #ifdef PADDLE_WITH_HIP + phi::backends::gpu::CUDAGraphCaptureModeGuard capture_mode_guard; if (UNLIKELY(malloc_managed_memory)) { result = hipMallocManaged(ptr, size); } else { diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/onednn_helper.h similarity index 99% rename from paddle/fluid/platform/mkldnn_helper.h rename to paddle/fluid/platform/onednn_helper.h index 6132aa9292e56..145f42f669d9d 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/onednn_helper.h @@ -52,7 +52,7 @@ inline void DontClearMKLDNNCache(const platform::Place& place) { } } -// If MKLDNN build and CPU place then register suffix in DeviceContext +// If OneDNN build and CPU place then register suffix in DeviceContext inline void AttachPointerHashToMKLDNNKey(void* ptr, const platform::Place& place) { if (platform::is_cpu_place(place)) { diff --git a/paddle/fluid/platform/mkldnn_op_list.h b/paddle/fluid/platform/onednn_op_list.h similarity index 100% rename from paddle/fluid/platform/mkldnn_op_list.h rename to paddle/fluid/platform/onednn_op_list.h diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 169d41d9763e5..4ef0cfee6e283 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -459,6 +459,15 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { } } +template +void rsqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { + if (x_grad) { + // This calculation is important for resnet. + auto x_grad_tmp = -0.5 * out * out * out * out_grad; + set_output(x_grad_tmp, x_grad); + } +} + template void floor_grad(const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { @@ -1123,6 +1132,58 @@ void max_grad(const Tensor& x, set_output(x_grad_tmp, x_grad); } +template +void min_grad(const Tensor& x, + const Tensor& out, + const Tensor& out_grad, + const IntArray& axis, + bool keepdim, + bool reduce_all, + Tensor* x_grad) { + if (!x_grad) { + return; + } + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); + std::vector x_dim = common::vectorize(x.dims()); + int64_t axis_size = axis.size(); + int64_t x_dim_size = x_dim.size(); + reduce_all = false; + if (reduce_all || axis_size == 0 || axis_size == x_dim_size) { + reduce_all = true; + } else { + reduce_all = false; + } + auto x_grad_tmp = Tensor(); + if (x_dim_size == 0 || x_dim_size == 1 || keepdim) { + auto out_grad_tmp = out_grad.expand(IntArray(x_dim)); + auto out_tmp = out.expand(IntArray(x_dim)); + auto mask = equal(x, out_tmp); + x_grad_tmp = where(mask, out_grad_tmp, zero_tensor); + } else { + auto axis_ = std::vector(); + if (reduce_all) { + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + } + } else { + axis_ = axis.GetData(); + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_[i] = axis[i] + x_dim_size; + } + } + } + auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); + auto out_grad_ = reshape(out_grad, out_grad_shape); + auto out_ = reshape(out, out_grad_shape); + auto out_grad_tmp = out_grad_.expand(IntArray(x_dim)); + auto out_tmp = out_.expand(IntArray(x_dim)); + auto mask = equal(x, out_tmp); + x_grad_tmp = where(mask, out_grad_tmp, zero_tensor); + } + set_output(x_grad_tmp, x_grad); +} + template void assign_grad(const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h index 7e7ccfaf170b3..67feb640c9f7a 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h @@ -784,5 +784,54 @@ void subtract_double_grad(const Tensor& y, } } +template +void exp_double_grad(const Tensor& out, + const Tensor& grad_out, + const Tensor& grad_x_grad, + Tensor* out_grad, + Tensor* grad_out_grad) { + // dout = dout_old * ddx + if (out_grad) { + auto out_grad_tmp = grad_out * grad_x_grad; + set_output(out_grad_tmp, out_grad); + } + + // ddout = out * ddx + if (grad_out_grad) { + auto grad_out_grad_tmp = out * grad_x_grad; + set_output(grad_out_grad_tmp, grad_out_grad); + } +} + +template +void log_double_grad(const Tensor& x, + const Tensor& grad_out, + const Tensor& grad_x_grad, + Tensor* x_grad, + Tensor* grad_out_grad) { + // dx = -dout/x^2 * ddx + if (x_grad) { + auto x_grad_tmp = -grad_out / (x * x) * grad_x_grad; + set_output(x_grad_tmp, x_grad); + } + + // ddout = ddx / x + if (grad_out_grad) { + auto grad_out_grad_tmp = grad_x_grad / x; + set_output(grad_out_grad_tmp, grad_out_grad); + } +} + +template +void abs_triple_grad(const Tensor& x, + const Tensor& grad_out_grad_grad, + Tensor* grad_grad_x_grad) { + // dddx = sign(x) * dddout + if (grad_grad_x_grad) { + auto grad_grad_x_grad_tmp = sign(x) * grad_out_grad_grad; + set_output(grad_grad_x_grad_tmp, grad_grad_x_grad); + } +} + } // namespace prim } // namespace paddle diff --git a/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc b/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc index a79e929a6e5cc..a479379cc6ab4 100644 --- a/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc +++ b/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc @@ -43,7 +43,7 @@ Tensor full_with_tensor(const Tensor& shape, std::static_pointer_cast(shape.impl())->value(); pir::Value value_res = paddle::dialect::full( std::vector{}, value.to(), dtype, place); - auto op_res = paddle::dialect::full_with_tensor(shape_res, value_res, dtype); + auto op_res = paddle::dialect::full_with_tensor(value_res, shape_res, dtype); Tensor out(std::make_shared(op_res)); return out; } diff --git a/paddle/fluid/primitive/base/primitive_ops.h b/paddle/fluid/primitive/base/primitive_ops.h index aa52907f8f7fe..4aafd7693ae75 100644 --- a/paddle/fluid/primitive/base/primitive_ops.h +++ b/paddle/fluid/primitive/base/primitive_ops.h @@ -46,7 +46,6 @@ const std::set& GetPrimitiveOpNames() { "pd_op.assign_value", "pd_op.concat", "pd_op.elementwise_pow", - "pd_op.rsqrt", "pd_op.floor", "pd_op.gather", "pd_op.gather_nd", @@ -91,6 +90,10 @@ const std::set& GetPrimitiveOpNames() { "pd_op.full_with_tensor", "pd_op.if", "pd_op.while", + /* Considering better performance, such ops are set as primitive ops + temporarily*/ + "pd_op.rsqrt", + "pd_op.sqrt", /* basic ops by PIR*/ "builtin.combine", "builtin.slice", diff --git a/paddle/fluid/primitive/codegen/decomp_gen.py b/paddle/fluid/primitive/codegen/decomp_gen.py index 95b40f9f87506..bfc157d24c3a6 100644 --- a/paddle/fluid/primitive/codegen/decomp_gen.py +++ b/paddle/fluid/primitive/codegen/decomp_gen.py @@ -156,7 +156,7 @@ def gen( Args: prim_path (pathlib.Path): The YAML file path of the primitive API. - fwd_path (pathlib.Path): The YAML file path of the forwad API. + fwd_path (pathlib.Path): The YAML file path of the forward API. rev_path (pathlib.Path): The YAML file path of the backward API. compat_path: (pathlib.Path): The YAML file path of the ops compat. fwd_pd_op_path (pathlib.Path): The YAML file path of the ir forward API. diff --git a/paddle/fluid/primitive/codegen/gen.py b/paddle/fluid/primitive/codegen/gen.py index e4d0e50e60877..dd75859e16b74 100644 --- a/paddle/fluid/primitive/codegen/gen.py +++ b/paddle/fluid/primitive/codegen/gen.py @@ -63,6 +63,7 @@ 'exp_grad', 'floor_grad', 'log_grad', + 'rsqrt_grad', 'sin_grad', 'cos_grad', 'tanh_grad', @@ -117,6 +118,9 @@ 'relu_grad', 'sigmoid_grad', 'silu_grad', + 'exp_grad', + 'log_grad', + 'abs_double_grad', 'softmax_grad', 'sqrt_grad', ] # custom vjp list of composite op diff --git a/paddle/fluid/primitive/composite/composite.h b/paddle/fluid/primitive/composite/composite.h index 63cec678eb8ae..ca602dfb2ea89 100644 --- a/paddle/fluid/primitive/composite/composite.h +++ b/paddle/fluid/primitive/composite/composite.h @@ -127,6 +127,59 @@ static bool valid_type(const DataType& dtype) { } } +template +Tensor p_norm_decomp(const Tensor& x, + const float& porder = 2.0, + const int& axis = -1, + const float epsilon = 1.0e-12f, + const bool& keepdim = false, + const bool& asvector = false) { + auto org_dtype = x.dtype(); + auto x_tmp = x; + + bool need_cast = is_half_dtype(org_dtype); + if (need_cast) { + x_tmp = cast(x, DataType::FLOAT32); + } + + Tensor res; + if (porder == 0.0) { + // 0-norm + auto zero = full(empty_shape, 0, x_tmp.dtype()); + auto none_zero = not_equal(x_tmp, zero); + res = cast(none_zero, x_tmp.dtype()); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + } else if (porder == 1.0) { + // 1-norm + res = abs(x_tmp); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + } else if (porder == 2.0) { + // 2-norm + res = sqrt(sum(x_tmp * x_tmp, {axis}, x_tmp.dtype(), keepdim)); + } else if (porder == INFINITY) { + // +INF-norm + res = abs(x_tmp); + res = max(x_tmp, {axis}, keepdim); + } else if (porder == -INFINITY) { + // -INF-norm + res = abs(x_tmp); + res = min(x_tmp, {axis}, keepdim); + } else { + // vanilla p-norm + auto porder_tensor = full(empty_shape, porder, x_tmp.dtype()); + auto inv_porder_tensor = full(empty_shape, 1 / porder, x_tmp.dtype()); + res = elementwise_pow(x_tmp, porder_tensor); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + res = elementwise_pow(res, inv_porder_tensor); + } + + if (need_cast) { + return cast(res, org_dtype); + } else { + return res; + } +} + template Tensor pow_decomp(const Tensor& x, const paddle::Scalar& y) { auto org_dtype = x.dtype(); @@ -592,24 +645,6 @@ std::tuple dropout_decomp( } } -template -Tensor sqrt_decomp(const Tensor& x) { - auto org_dtype = x.dtype(); - Tensor x_cast = x; - - bool need_cast = is_half_dtype(org_dtype); - if (need_cast) { - x_cast = cast(x, DataType::FLOAT32); - } - - auto ans = 1.0 / rsqrt(x_cast); - if (need_cast) { - return cast(ans, org_dtype); - } else { - return ans; - } -} - template Tensor gelu_decomp(const Tensor& x, bool approximate) { const double PM_2_SQRTPI = 1.12837916709551257390; /* 2/sqrt(pi) */ diff --git a/paddle/fluid/primitive/primitive.yaml b/paddle/fluid/primitive/primitive.yaml index 58c3ac09b782a..f5e99706faf97 100644 --- a/paddle/fluid/primitive/primitive.yaml +++ b/paddle/fluid/primitive/primitive.yaml @@ -4,6 +4,7 @@ - divide - elementwise_pow - rsqrt +- sqrt - sin - sinh - asin diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h index 626e0e9d78b26..bfd6f41dcbeb7 100644 --- a/paddle/fluid/primitive/rule/vjp/details.h +++ b/paddle/fluid/primitive/rule/vjp/details.h @@ -69,7 +69,7 @@ void divide_grad(const Tensor& x, Tensor* dy) { if (dy) { // dy = -(x/y^2) * dout - auto dy_res = -(x / y.pow(2.0)) * out_grad; + auto dy_res = -(x / (y * y)) * out_grad; if (out_grad.dims() != y.dims()) { phi::DDim reduce_dim = get_reduce_dims_from_out(out_grad.dims(), y.dims()); @@ -566,9 +566,7 @@ void layer_norm_grad(const Tensor& x, auto x_sub_mean = x_cast - mean_; // M,N auto tmp = (1.0 / (variance_ + epsilon)); // M,1 - // auto sqrt_var_1 = sqrt(tmp); // M,1 - auto sqrt_var_1 = elementwise_pow( - tmp, full(common::vectorize(tmp.dims()), 0.5, tmp.dtype())); + auto sqrt_var_1 = sqrt(tmp); // M,1 auto x_sub_mean_mul_sqrt_var_1 = x_sub_mean * sqrt_var_1; if (x_grad) { @@ -721,6 +719,15 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { } } +template +void rsqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { + if (x_grad) { + // This calculation is important for resnet. + auto x_grad_tmp = -0.5 * out * out * out * out_grad; + set_output(x_grad_tmp, x_grad); + } +} + template void silu_grad(const Tensor& x, const Tensor& out, @@ -1254,7 +1261,7 @@ void batch_norm_grad(const Tensor& x, auto eps = full(common::vectorize(run_var.dims()), epsilon, run_var.dtype()); mean_data = run_mean; - rsqrt_var = (run_var + eps).pow(-0.5); + rsqrt_var = rsqrt(run_var + eps); } else { mean_data = saved_mean; rsqrt_var = saved_variance; @@ -1596,7 +1603,7 @@ void group_norm_grad(const Tensor& x, p1 = (reshape(inv_std, {N, groups, 1})).expand(shape_group); } - auto p2 = (d2 * mean - d1) * (inv_std_mul_s * inv_std * inv_std); + auto p2 = (d2 * mean - d1) * (inv_std_mul_s / var_eps); auto p3 = -p2 * mean - d2 * inv_std_mul_s; std::vector first_shape; std::vector second_shape; diff --git a/paddle/fluid/primitive/utils/utils.h b/paddle/fluid/primitive/utils/utils.h index c67886bc2ed2c..42f1533db723e 100644 --- a/paddle/fluid/primitive/utils/utils.h +++ b/paddle/fluid/primitive/utils/utils.h @@ -138,7 +138,7 @@ static phi::DDim get_reduce_dims_from_out(const phi::DDim& dout_dims, result.push_back(i); } for (int i = 0; i < in_dims.size(); ++i) { - if (in_dims[i] == 1 && dout_dims[i] != 1) { + if (in_dims[i] == 1 && dout_dims[i + bat] != 1) { result.push_back(i + bat); } else { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index ecf95eb234972..0a32e0ea8f9ff 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -41,6 +41,7 @@ set(PYBIND_DEPS op_dialect_vjp program_translator pir_transforms + pir_save_load new_profiler fluid_jit prim_utils @@ -399,11 +400,11 @@ if(WITH_PYTHON) list(APPEND OP_IMPL_DEPS ${op_impl_path}/openblas.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/openblas.dll) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_custom_command( OUTPUT ${op_impl_path}/mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${op_impl_path} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) endif() @@ -474,12 +475,12 @@ if(WITH_PYTHON) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0 COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) endif() diff --git a/paddle/fluid/pybind/dist_static_op_function.h b/paddle/fluid/pybind/dist_static_op_function.h index afd71b7521567..c23a16bca2730 100644 --- a/paddle/fluid/pybind/dist_static_op_function.h +++ b/paddle/fluid/pybind/dist_static_op_function.h @@ -18,6 +18,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/exception.h" +#include "paddle/fluid/pybind/pir.h" #include "paddle/phi/core/enforce.h" namespace paddle { @@ -66,12 +67,42 @@ static PyObject *static_api_reshard(PyObject *self, PyObject *process_mesh_obj = PyTuple_GET_ITEM(args, 1); auto process_mesh = CastPyArg2ProcessMesh(process_mesh_obj, 1); - PyObject *dims_mapping_obj = PyTuple_GET_ITEM(args, 2); - auto dims_mapping = CastPyArg2VectorOfInt64(dims_mapping_obj, 2); + PyObject *placements_obj = PyTuple_GET_ITEM(args, 2); + auto placements = CastPyArg2VectorOfPlacement(placements_obj, 2); + + int64_t ndim = GetValueDims(input).size(); + std::vector dim_map(ndim, -1); + for (size_t i = 0; i < placements.size(); i++) { + auto &placement = placements[i]; + if (placement->is_shard()) { + auto shard_dim = + dynamic_cast(*placement).get_dim(); + PADDLE_ENFORCE_EQ( + dim_map[shard_dim], + -1, + common::errors::InvalidArgument( + "Tensor dim %lld is already sharded on mesh dim %lld," + " DistTensor operator implementation does not support things " + "like hybrid" + " sharding strategies yet (i.e. [Shard(0), Shard(0)])", + shard_dim, + dim_map[shard_dim])); + dim_map[shard_dim] = i; + } + } + paddle::flat_hash_map partial_status; + for (size_t i = 0; i < placements.size(); ++i) { + auto &p = placements[i]; + if (p->is_partial()) { + partial_status.insert( + {i, + dynamic_cast(*p).get_reduce_type()}); + } + } // Call ir static api auto static_api_out = - paddle::dialect::reshard(input, process_mesh, dims_mapping); + paddle::dialect::reshard(input, process_mesh, dim_map, partial_status); return ToPyObject(static_api_out); } catch (...) { diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index aba7c99662bbe..48f0168196949 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -32,7 +32,6 @@ limitations under the License. */ #include "paddle/fluid/jit/function.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/operators/py_func_op.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/utils/utils.h" #include "paddle/fluid/platform/enforce.h" @@ -47,6 +46,7 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/placement_types.h" #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/pir/include/core/attribute.h" COMMON_DECLARE_bool(check_nan_inf); @@ -2478,6 +2478,7 @@ PyObjectHolder::PyObjectHolder(PyObject* ptr) { ptr_ = ptr; } PyObjectHolder::~PyObjectHolder() { // NOLINT ::pybind11::gil_scoped_acquire gil; + // NOTE(deepllz): ptr_ is owned by this object, so release it in destructor. Py_XDECREF(ptr_); } @@ -2512,7 +2513,10 @@ std::shared_ptr PackHook::operator()( bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); PyTuple_SET_ITEM(args, 0, paddle::pybind::ToPyObject(tensor)); PyObject* ret = PyObject_Call(hook_, args, nullptr); PADDLE_ENFORCE_NOT_NULL(ret, @@ -2527,7 +2531,10 @@ void* PackHook::operator()(void* py_tensor) { bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); Py_INCREF(reinterpret_cast(py_tensor)); PyTuple_SET_ITEM(args, 0, reinterpret_cast(py_tensor)); PyObject* ret = PyObject_Call(hook_, args, nullptr); @@ -2551,13 +2558,20 @@ paddle::Tensor UnPackHook::operator()( bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); - Py_INCREF(reinterpret_cast(packed_value->get())); - PyTuple_SET_ITEM(args, 0, reinterpret_cast(packed_value->get())); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); + PyObject* py_packed_value = reinterpret_cast(packed_value->get()); + Py_INCREF(py_packed_value); + PyTuple_SET_ITEM(args, 0, py_packed_value); PyObject* ret = PyObject_Call(hook_, args, nullptr); PADDLE_ENFORCE_NOT_NULL(ret, paddle::platform::errors::External( pybind11::detail::error_string().c_str())); + // NOTE(deepllz): tupledealloc will cause the reference count of the objects + // in it to be decremented by one, so no need to call + // Py_XDECREF(py_packed_value) Py_XDECREF(args); egr::Controller::Instance().SetHasGrad(grad_tmp); @@ -2576,7 +2590,10 @@ void* UnPackHook::operator()(void* packed_value, void* other) { bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); Py_INCREF(reinterpret_cast(packed_value)); PyTuple_SET_ITEM(args, 0, reinterpret_cast(packed_value)); PyObject* ret = PyObject_Call(hook_, args, nullptr); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index b70efdbabbebc..55173bad9a1c8 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -54,7 +54,6 @@ limitations under the License. */ #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/xccl_context.h" #include "paddle/fluid/memory/allocation/mmap_allocator.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/pybind/cuda_streams_py.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/pybind_variant_caster.h" @@ -62,6 +61,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/pybind/uva_utils.h" #include "paddle/phi/core/compat/arg_map_context.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/type_defs.h" namespace paddle { diff --git a/paddle/fluid/pybind/io.cc b/paddle/fluid/pybind/io.cc index 9075e904ef4b8..d38dbf72643ce 100644 --- a/paddle/fluid/pybind/io.cc +++ b/paddle/fluid/pybind/io.cc @@ -17,6 +17,8 @@ limitations under the License. */ #include "paddle/fluid/framework/io/save_load_tensor.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" +#include "paddle/fluid/pir/serialize_deserialize/include/interface.h" +#include "paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/pybind/pybind_variant_caster.h" #include "paddle/utils/pybind.h" @@ -122,6 +124,24 @@ void BindIO(pybind11::module *m) { paddle::framework::LoadTensor(path, &tensor_load); return tensor_load; }); + + m->def("save_func", &pir::SaveFunction); + + m->def("save_combine_func", &pir::SaveCombineFunction); + + m->def("load_func", &pir::LoadFunction); + + m->def("load_combine_func", &pir::LoadCombineFunction); + + m->def("serialize_pir_program", + &pir::WriteModule, + py::arg("program"), + py::arg("file_path"), + py::arg("pir_version"), + py::arg("overwrite") = true, + py::arg("readable") = false, + py::arg("trainable") = true); + m->def("deserialize_pir_program", &pir::ReadModule); } } // namespace pybind } // namespace paddle diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h index 7767c4a4569b3..8943633fb4cda 100644 --- a/paddle/fluid/pybind/manual_static_op_function.h +++ b/paddle/fluid/pybind/manual_static_op_function.h @@ -159,7 +159,7 @@ PyObject *static_api_full(PyObject *self, PyObject *args, PyObject *kwargs) { CallStackRecorder callstack_recoder("full_with_tensor"); callstack_recoder.Record(); auto static_api_out = - paddle::dialect::full_with_tensor(shape, value, dtype); + paddle::dialect::full_with_tensor(value, shape, dtype); callstack_recoder.AttachToOps(); return ToPyObject(static_api_out); diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc index a6496602f695b..93dd4ba992068 100644 --- a/paddle/fluid/pybind/pir.cc +++ b/paddle/fluid/pybind/pir.cc @@ -300,6 +300,15 @@ void BindProgram(py::module *m) { [](std::shared_ptr self, int64_t random_seed) { SetProgramInt64Attr(self, "random_seed", random_seed); }) + .def_property_readonly( + "blocks", + [](const std::shared_ptr &self) { + // Note: We only return global block currently. + py::list op_list; + op_list.append(self->block()); + return op_list; + }, + return_value_policy::reference) .def("get_output_value_by_name", [](Program &self, const std::string &name) { return GetOutputValueByName(self, name); @@ -1274,6 +1283,10 @@ static auto GetNoNeedBufferValue(const ::pir::Block *whole_block, std::unordered_set<::pir::Value> no_need_buffer_values; range_block_do( whole_block, range, [&need_buffer_values](::pir::Operation *op) { + // NOTE(SigureMo): We should process the CombineOp in it's users. + if (op->isa()) { + return; + } if (op->HasInterface() == false) { // not a OpYamlInfoInterface, can't have no_need_buffer. for (const auto &operand : op->operands_source()) { @@ -1284,8 +1297,16 @@ static auto GetNoNeedBufferValue(const ::pir::Block *whole_block, op->dyn_cast().GetOpInfo(); int counter = 0; for (const auto &op_input_info : std::get<0>(opinfo)) { + auto value = op->operand_source(counter); if (!op_input_info.no_need_buffer) { - need_buffer_values.insert(op->operand_source(counter)); + need_buffer_values.insert(value); + if (!IsFakeValue(value) && value.defining_op() && + value.defining_op()->isa()) { + for (const auto &combine_value : + value.defining_op()->operands_source()) { + need_buffer_values.insert(combine_value); + } + } } counter += 1; } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5470f4d7ec4f2..35d1a297720b4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -190,6 +190,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_CINN +#include "paddle/cinn/pybind/bind.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/pybind/test.h" #endif @@ -405,6 +406,10 @@ bool SupportsInt8() { #endif } +bool SupportsAvx512F() { + return phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx512f); +} + bool SupportsVNNI() { #ifndef PADDLE_WITH_DNNL return false; @@ -2153,6 +2158,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); m.def("supports_int8", SupportsInt8); + m.def("supports_avx512f", SupportsAvx512F); m.def("supports_vnni", SupportsVNNI); m.def("op_supported_infos", imperative::OpSupportedInfos); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); @@ -3053,6 +3059,7 @@ All parameter, weight, gradient are variables in Paddle. #if defined(PADDLE_WITH_CINN) BindTest(&m); + cinn::pybind::BindCINN(&m); #endif BindPir(&m); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index ba3a466fba219..c93588f73d6f3 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -31,11 +31,11 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/pybind/complex.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/cuda_device_guard.h" @@ -696,7 +696,7 @@ void _sliceCompute(const phi::DenseTensor *in, auto out_t = framework::EigenTensor::From( *out); - operators::EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( eigen_place, out_t, in_t, offsets, extents); } diff --git a/paddle/fluid/pybind/uva_utils.h b/paddle/fluid/pybind/uva_utils.h index 7f29814bcecb5..4d46a2398056d 100644 --- a/paddle/fluid/pybind/uva_utils.h +++ b/paddle/fluid/pybind/uva_utils.h @@ -20,10 +20,10 @@ #undef copysign #endif -#include "paddle/fluid/operators/utils.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace pybind { diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 7325aef2202b5..93144847c3333 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -62,8 +62,8 @@ if(WITH_XBYAK) list(APPEND PHI_DEPS xbyak) endif() -if(WITH_MKLDNN) - list(APPEND PHI_DEPS mkldnn) +if(WITH_ONEDNN) + list(APPEND PHI_DEPS onednn) endif() if(WITH_GLOO) @@ -119,8 +119,10 @@ if(WITH_AVX AND AVX512F_FLAG AND WITH_MKL) set_source_files_properties( + kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc kernels/fusion/cpu/self_dp_attention_kernel.cc - PROPERTIES COMPILE_FLAGS "-Wno-maybe-uninitialized -mfma ${AVX512F_FLAG}") + PROPERTIES COMPILE_FLAGS + "${Wno_Maybe_Uninitialized} ${FMA_FLAG} ${AVX512F_FLAG}") endif() if(WITH_GPU) diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index af4de2580f578..13077fb9167ad 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -26,7 +26,7 @@ namespace experimental { * and the higher backend bit has a higher priority. * * A Tensor may belong to multiple backends at the same time, such CPU and - * MKLDNN. Only one backend value cannot + * OneDNN. Only one backend value cannot */ class BackendSet final { public: diff --git a/paddle/phi/api/profiler/device_tracer.cc b/paddle/phi/api/profiler/device_tracer.cc index e1c009fa9cad0..085d28220a6a9 100644 --- a/paddle/phi/api/profiler/device_tracer.cc +++ b/paddle/phi/api/profiler/device_tracer.cc @@ -834,7 +834,7 @@ uint32_t GetCurSystemThreadId() { return id; } -void RecoreCurThreadId(uint64_t id) { +void RecordCurThreadId(uint64_t id) { std::lock_guard lock(system_thread_id_map_mutex); auto gid = GetCurSystemThreadId(); system_thread_id_map[gid] = id; diff --git a/paddle/phi/api/profiler/device_tracer.h b/paddle/phi/api/profiler/device_tracer.h index bde73357f2075..a0f4b5c54670e 100644 --- a/paddle/phi/api/profiler/device_tracer.h +++ b/paddle/phi/api/profiler/device_tracer.h @@ -162,5 +162,5 @@ void ClearCurBlock(); int BlockDepth(); // Set current thread id, so we can map the system thread id to thread id. -void RecoreCurThreadId(uint64_t id); +void RecordCurThreadId(uint64_t id); } // namespace phi diff --git a/paddle/phi/api/profiler/profiler_helper.h b/paddle/phi/api/profiler/profiler_helper.h index 31ccbbb12fb6f..16ae735fccc1e 100644 --- a/paddle/phi/api/profiler/profiler_helper.h +++ b/paddle/phi/api/profiler/profiler_helper.h @@ -73,7 +73,7 @@ inline EventList &GetEventList() { ProfilerHelper::g_thread_id = ProfilerHelper::g_next_thread_id++; ProfilerHelper::g_all_event_lists.emplace_front( ProfilerHelper::g_event_list); - RecoreCurThreadId(ProfilerHelper::g_thread_id); + RecordCurThreadId(ProfilerHelper::g_thread_id); } return *ProfilerHelper::g_event_list; } diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 603b65c8b4c53..3937464fbce49 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -13,6 +13,7 @@ kernel : func : abs_double_grad data_type : grad_x_grad + backward : abs_triple_grad - backward_op : abs_grad forward : abs (Tensor x) -> Tensor(out) @@ -27,6 +28,17 @@ composite : abs_grad(x, out_grad, x_grad) backward : abs_double_grad +- backward_op : abs_triple_grad + forward : abs_double_grad (Tensor x, Tensor grad_x_grad) -> Tensor(grad_out_grad) + args : (Tensor x, Tensor grad_out_grad_grad) + output : Tensor(grad_x_grad_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + data_transform : + support_trans_dtype : x + composite : abs_triple_grad(x, grad_out_grad_grad, grad_x_grad_grad) + - backward_op : acos_grad forward : acos (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) @@ -717,6 +729,16 @@ kernel : func : erfinv_grad +- backward_op : exp_double_grad + forward : exp_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor out, Tensor grad_out, Tensor grad_x_grad) + output : Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [out, out] + composite : exp_double_grad(out, grad_out, grad_x_grad, out_grad, grad_out_grad) + inplace : (grad_x_grad -> grad_out_grad) + - backward_op : exp_grad forward : exp (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) @@ -728,6 +750,7 @@ kernel : func : exp_grad inplace : (out_grad -> x_grad) + backward : exp_double_grad composite : exp_grad(out, out_grad, x_grad) - backward_op : expand_as_grad @@ -1434,6 +1457,7 @@ param : [x, x] kernel : func : log_double_grad + composite : log_double_grad(x, grad_out, grad_x_grad, x_grad, grad_out_grad) inplace : (grad_x_grad -> grad_out_grad) - backward_op : log_grad @@ -2010,6 +2034,7 @@ spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : rsqrt_grad + composite : rsqrt_grad(out, out_grad, x_grad) backward : rsqrt_double_grad inplace : (out_grad -> x_grad) @@ -2734,6 +2759,9 @@ forward: silu_grad (Tensor x, Tensor out, Tensor grad_out) -> Tensor(grad_x) args: (Tensor x, Tensor out, Tensor grad_out, Tensor grad_x_grad) output: Tensor(x_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, x] composite: silu_double_grad(x, out, grad_out, grad_x_grad, x_grad, grad_out_grad) - backward_op: unpool3d_grad diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index 2df5f03476485..032bc437fc5fe 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -1,4 +1,4 @@ -# This file is designed for fusion C++ farward operators, which manages the +# This file is designed for fusion C++ forward operators, which manages the # generated code for static mode and dynamic mode (when `support_dygraph_mode` is true). # "support_dygraph_mode" is an extra configuration item in this file, # if one operator have "support_dygraph_mode : true", it supports dygraph mode, @@ -83,6 +83,15 @@ data_type : x optional : bias, branch, branch_max ,x_max, scale_max, out_max_in +- op : cross_attention_xpu + args : (Tensor input_q, Tensor input_kv, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor mask, int head_num, int head_dim, float alpha, DataType out_dtype) + output : Tensor(qkv), Tensor(qkv_max) + infer_meta : + func : CrossAttentionXPUInferMeta + kernel : + func : cross_attention_xpu + data_type : input_q + - op : dequantize_xpu args : (Tensor x, DataType out_dtype, float scale = 1.0f) output : Tensor(y) @@ -186,6 +195,7 @@ func : fused_conv2d_add_act data_type : input optional : bias, residual_data, outputs + interfaces : paddle::dialect::LayoutTransformationInterface - op : fused_dconv_drelu_dbn args : (Tensor grad_output, Tensor weight, Tensor grad_output_add, Tensor residual_input, Tensor bn1_eqscale, Tensor bn1_eqbias, Tensor conv_input, Tensor bn1_mean, Tensor bn1_inv_std, Tensor bn1_gamma, Tensor bn1_beta, Tensor bn1_input, Tensor bn2_mean, Tensor bn2_inv_std, Tensor bn2_gamma, Tensor bn2_beta, Tensor bn2_input, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, str data_format, bool fuse_shortcut, bool fuse_dual, bool fuse_add, bool exhaustive_search) @@ -375,6 +385,15 @@ func : generate_sequence_xpu data_type : dtype +- op : group_norm_silu_xpu + args : (Tensor x, Tensor scale, Tensor bias, int groups, float epsilon) + output : Tensor(out) + infer_meta : + func : GroupNormalizeSiluXPUInferMeta + kernel : + func : group_norm_silu_xpu + data_type : x + - op : layer_norm_act_xpu args : (Tensor x, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon, int act_type, float act_param) output : Tensor(out) @@ -420,14 +439,14 @@ optional : bias_qk - op : qkv_attention_xpu - args : (Tensor q, Tensor k, Tensor v, Tensor q_max, Tensor k_max, Tensor v_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype) - output : Tensor(qkv), Tensor(qkv_max) + args : (Tensor q, Tensor k, Tensor v, Tensor q_max, Tensor k_max, Tensor v_max, Tensor qk_max, Tensor qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype) + output : Tensor(qkv) infer_meta : func : QKVAttentionXPUInferMeta kernel : func : qkv_attention_xpu data_type : q - optional : q_max, k_max, v_max + optional : q_max, k_max, v_max, qk_max, qkv_max - op : quantize_xpu args : (Tensor x, DataType out_dtype, float scale = 1.0f) @@ -474,6 +493,15 @@ func : skip_layernorm data_type : x +- op : spatial_transformer_resblock_xpu + args : (Tensor x, Tensor[] x_max, Tensor[] conv_bias, Tensor[] conv_filter, Tensor[] conv_filter_max, Tensor[] gn_bias, Tensor[] gn_scale, int[] dilations, int[] paddings, int[] strides, float[] gn_eps, int[] gn_groups, int[] groups, bool conv_fix, bool has_silu_fc_input, bool include_silu) + output : Tensor(out), Tensor(out_max) + infer_meta : + func : SpatialTransformerResblockXPUInferMeta + kernel : + func : spatial_transformer_resblock_xpu + data_type : x + - op : squeeze_excitation_block args : (Tensor x, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] act_type, float[] act_param, int[] filter_dims) output : Tensor(out) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 8478e3caec98c..b24b3a20c37eb 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -411,6 +411,7 @@ param: [x] kernel : func : min_grad + composite : min_grad(x, out, out_grad, axis, keepdim, reduce_all, x_grad) - backward_op : minimum_grad forward : minimum(Tensor x, Tensor y) -> Tensor(out) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 142814e1cc01e..188367817803a 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -77,7 +77,6 @@ backend : place data_transform : support_trans_dtype : start, end, step - interfaces : paddle::dialect::InferSymbolicShapeInterface - op : assign args : (Tensor x) @@ -551,7 +550,7 @@ skip_transform : x - op : full_with_tensor - args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32) + args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32) output: Tensor(out) infer_meta : func : FullWithTensorInferMeta @@ -1099,7 +1098,6 @@ kernel : func : split backward : split_grad - interfaces : paddle::dialect::InferSymbolicShapeInterface - op : split_with_num args : (Tensor x, int num, Scalar(int) axis) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 0dbc54962da98..56dad40de1353 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -15,7 +15,7 @@ # attrs : [bool is_test = false] - op : abs - backward : abs_grad + backward : abs_grad, abs_double_grad, abs_triple_grad inputs : x : X outputs : @@ -296,6 +296,12 @@ get_expected_kernel_type : assign : GetAssignExpectedKernelType +- op : assign_pos + inputs : + {x : X} + outputs : + out : Out + - op : assign_value outputs : out : Out @@ -818,6 +824,12 @@ outputs : out : Out +- op : dgc_momentum + inputs : + {param : Param, grad : Grad, velocity : Velocity, learning_rate : LearningRate, master_param : MasterParam, current_step_tensor : current_step, nranks_tensor : nranks} + outputs : + {param_out : ParamOut, velocity_out : VelocityOut, master_param_out : MasterParamOut, grad_out : Grad_out} + - op : diag (diag_v2) backward : diag_grad (diag_v2_grad) inputs : @@ -1019,7 +1031,7 @@ out : Out - op : exp - backward : exp_grad + backward : exp_grad, exp_double_grad inputs : x : X outputs : @@ -1267,6 +1279,12 @@ data_type : float support_tensor : true +- op : full_with_tensor + int_array: + shape : + data_type : int64_t + support_tensor : true + - op : fused_adam_(fused_adam) inputs : {params : Params, grads : Grads, learning_rate : LearningRate, moments1 : Moments1, @@ -3705,6 +3723,12 @@ outputs: {param_out : ParamOut, moment_out : MomentOut} +- op: dgc + inputs: + {u: U, v: V, grad: Grad} + outputs: + {u_out: U_out, v_out: V_out, encode_grad: EncodeGrad, grad_out: Grad_out, gather_buff: GatherBuff} + - op: distribute_fpn_proposals inputs : {fpn_rois: FpnRois, rois_num: RoisNum} @@ -3713,6 +3737,12 @@ multi_level_rois_num: MultiLevelRoIsNum restore_index: RestoreIndex +- op: distributed_fused_lamb + inputs: + {param: Param, grad: Grad, fp32_fused_param: FP32FusedParam, fp32_fused_grad: FP32FusedGrad, fp16_fused_param: FP16FusedParam, fp16_fused_grad: FP16FusedGrad, moment1: Moment1, moment2: Moment2, beta1pow: Beta1Pow, beta2pow: Beta2Pow, fused_param_offsets: FusedParamOffsets, fp32_shard_fused_param_offsets: FP32ShardFusedParamOffsets, fp16_shard_fused_param_offsets: FP16ShardFusedParamOffsets, param_info: ParamInfo, param_order: ParamOrder, learning_rate: LearningRate, global_scale: GlobalScale} + outputs: + {param_out : ParamOut, fp32_fused_param_out: FP32FusedParamOut, fp16_fused_param_out: FP16FusedParamOut, fp32_acc_fused_grad: FP32AccFusedGrad, fp16_acc_fused_grad: FP16AccFusedGrad, moment1_out: Moment1Out, moment2_out: Moment2Out, beta1pow_out: Beta1PowOut, beta2pow_out: Beta2PowOut, found_inf: FoundInf, acc_step: AccStep, stop_update: StopUpdate, step: Step} + - op: distributed_fused_lamb_init inputs: {param: Param, grad: Grad} diff --git a/paddle/phi/api/yaml/op_version.yaml b/paddle/phi/api/yaml/op_version.yaml index 2bd09abd311ae..6e7a2cff79764 100644 --- a/paddle/phi/api/yaml/op_version.yaml +++ b/paddle/phi/api/yaml/op_version.yaml @@ -274,7 +274,7 @@ - op : generate_proposals version : - - checkpoint : Registe generate_proposals_v2 for adding the attribute of pixel_offset + - checkpoint : Register generate_proposals_v2 for adding the attribute of pixel_offset action : - add_attr : pixel_offset comment : If true, im_shape pixel offset is 1. diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 918cbb980d00f..0d6fbfc83691a 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1,7 +1,7 @@ # This file is designed for C++ operators, which manages the # generated code for dynamic mode and static mode. If you want # to add the new operator configuration, make sure an operator's -# Python API, dynamic graph API, and static graph Opertaor parameters +# Python API, dynamic graph API, and static graph Operator parameters # are consistent and correspond one-to-one. It's forbidden that the # operator configured in this yaml file does not have Python API. @@ -2559,7 +2559,7 @@ kernel : func : shape {dense -> dense}, shape_sr {selected_rows -> dense} - data_transform: + data_transform : skip_transform : input interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -2620,7 +2620,7 @@ spmd_rule : ElementwiseUnaryInferSpmd kernel : func : sin - inplace: (x -> out) + inplace : (x -> out) backward : sin_grad interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -2781,10 +2781,10 @@ - op : swiglu args : (Tensor x, Tensor y) output : Tensor(out) - infer_meta: - func: SwiGLUInferMeta - spmd_rule: SwiGLUInferSpmd - kernel: + infer_meta : + func : SwiGLUInferMeta + spmd_rule : SwiGLUInferSpmd + kernel : func : swiglu optional : y backward: swiglu_grad @@ -2808,7 +2808,7 @@ func : UnchangedInferMeta kernel : func : tan - inplace: (x -> out) + inplace : (x -> out) backward : tan_grad interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -3057,9 +3057,9 @@ func : WarpctcInferMeta kernel : func : warpctc - data_type: logits - optional: logits_length, labels_length - intermediate: warpctcgrad + data_type : logits + optional : logits_length, labels_length + intermediate : warpctcgrad backward : warpctc_grad - op : warprnnt @@ -3069,8 +3069,8 @@ func : WarprnntInferMeta kernel : func : warprnnt - data_type: input - intermediate: warprnntgrad + data_type : input + intermediate : warprnntgrad backward : warprnnt_grad - op : weight_dequantize @@ -3090,8 +3090,8 @@ kernel : func : weight_only_linear data_type : x - optional: bias - backward: weight_only_linear_grad + optional : bias + backward : weight_only_linear_grad - op : weight_quantize args : (Tensor x, str algo = "weight_only_int8", int arch = 80, int group_size = -1) @@ -3100,7 +3100,8 @@ func : WeightQuantizeInferMeta kernel : func : weight_quantize - data_type: x + data_type : x + backend : x - op : weighted_sample_neighbors args : (Tensor row, Tensor colptr, Tensor edge_weight, Tensor input_nodes, Tensor eids, int sample_size, bool return_eids) @@ -3119,7 +3120,7 @@ spmd_rule: WhereInferSpmd kernel : func : where - inplace: (x -> out) + inplace : (x -> out) backward : where_grad interfaces : paddle::dialect::InferSymbolicShapeInterface diff --git a/paddle/phi/api/yaml/static_backward.yaml b/paddle/phi/api/yaml/static_backward.yaml index 526a7195a5bb3..d4ca3f05e7c0b 100755 --- a/paddle/phi/api/yaml/static_backward.yaml +++ b/paddle/phi/api/yaml/static_backward.yaml @@ -103,7 +103,7 @@ output : Tensor(weight_grad) infer_meta : func : EmbeddingGradInferMeta - param : [x,weght] + param : [x,weight] kernel : func : embedding_grad {dense, dense, dense -> dense} embedding_sparse_grad {dense, dense, dense -> selected_rows} diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 80d5f14e627a3..67690440f6bbb 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -31,7 +31,7 @@ if(WITH_XPU) list(APPEND BACKENDS_DEPS phi_dynload_xpti) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) list(APPEND BACKENDS_SRCS onednn/onednn_context.cc) list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc) list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc) diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc index 0b056d6df972f..3e65845905646 100644 --- a/paddle/phi/backends/dynload/dynamic_loader.cc +++ b/paddle/phi/backends/dynload/dynamic_loader.cc @@ -103,13 +103,14 @@ static constexpr char* win_nvjpeg_lib = ".dll;nvjpeg64_" CUDA_VERSION_MAJOR ".dll;nvjpeg64_10.dll"; static constexpr char* win_cusolver_lib = "cusolver64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR - ".dll;cusolver64_" CUDA_VERSION_MAJOR ".dll;cusolver64_10.dll"; + ".dll;cusolver64_" CUDA_VERSION_MAJOR + ".dll;cusolver64_11.dll;cusolver64_10.dll"; static constexpr char* win_cusparse_lib = "cusparse64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;cusparse64_" CUDA_VERSION_MAJOR ".dll;cusparse64_10.dll"; static constexpr char* win_cufft_lib = "cufft64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR - ".dll;cufft64_" CUDA_VERSION_MAJOR ".dll;cufft64_10.dll"; + ".dll;cufft64_" CUDA_VERSION_MAJOR ".dll;cufft64_11.dll;cufft64_10.dll"; #else static constexpr char* win_curand_lib = "curand64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR diff --git a/paddle/phi/backends/onednn/onednn_context.cc b/paddle/phi/backends/onednn/onednn_context.cc index b7789f29740f0..1a27e83af50fb 100644 --- a/paddle/phi/backends/onednn/onednn_context.cc +++ b/paddle/phi/backends/onednn/onednn_context.cc @@ -189,7 +189,7 @@ struct OneDNNContext::Impl { std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current mkldnn session id. + // Find ShapeBlob for current onednn session id. auto map_it = pMap->find(sid); if (map_it == pMap->end()) { @@ -259,7 +259,7 @@ struct OneDNNContext::Impl { std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current mkldnn session id firstly + // Find ShapeBlob for current onednn session id firstly auto map_it = pMap->find(sid); // (jczaja): After first iteration of model's execution we // should have all elements cached (mostly) so failures are unlikely (less @@ -366,7 +366,7 @@ struct OneDNNContext::Impl { unsigned int block_next_cache_clearing_ = 0; // Holds some attributes only used by the onednn kernel calculation - // Since original mkldnn op kernel directly adds the operations that require + // Since original onednn op kernel directly adds the operations that require // fusion to the native kernel operations, and uses the attribute `fuse_xxx` // to control, for onednn, there will be some attributes that seem to be // independent of the device are also saved here. diff --git a/paddle/phi/backends/onednn/onednn_context.h b/paddle/phi/backends/onednn/onednn_context.h index 499be34650098..0e4654cb50a77 100644 --- a/paddle/phi/backends/onednn/onednn_context.h +++ b/paddle/phi/backends/onednn/onednn_context.h @@ -28,7 +28,7 @@ namespace phi { using TensorNameMap = std::map>; class OneDNNContextThreadLocals { - // default mkldnn session id + // default onednn session id typedef OneDNNContextThreadLocals self; struct Body { @@ -38,7 +38,7 @@ class OneDNNContextThreadLocals { // - For fixed-shape, it's a null string in default. // - For dynamic-shape, it's user specific. std::string cur_input_shape_str; - // the cache capacity of different input shapes for MKLDNN. + // the cache capacity of different input shapes for OneDNN. // Default 1 means fixed input shape, not dynamic shape. int cur_input_shape_cache_capacity; // Recently registered data_format. This is needed to @@ -73,9 +73,9 @@ class OneDNNContextThreadLocals { OneDNNContextThreadLocals(const OneDNNContextThreadLocals& c) = delete; public: - // default mkldnn session id + // default onednn session id static constexpr size_t kMKLDNNSessionID_Default = 0; - // mkldnn session id for cache clearing mode + // onednn session id for cache clearing mode static constexpr size_t kMKLDNNSessionID_CacheClearing = -1; TEST_API static Body& fetch(); }; @@ -89,7 +89,7 @@ class OneDNNContext : public CPUContext { template using umap_key_string_t = umap_value_smart_t; - // Following three maps are used to cache MKLDNN primitives. + // Following three maps are used to cache OneDNN primitives. // There relations are: // - BlobMap = Map // - ShapeBlob = Map diff --git a/paddle/phi/backends/xpu/xpu1_op_list.cc b/paddle/phi/backends/xpu/xpu1_op_list.cc index 29484d43867c2..58e5c5d72beab 100644 --- a/paddle/phi/backends/xpu/xpu1_op_list.cc +++ b/paddle/phi/backends/xpu/xpu1_op_list.cc @@ -154,6 +154,7 @@ XPUOpMap& get_kl1_ops() { XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_switch_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_switch", XPUKernelSet({phi::DataType::FLOAT32})}, {"index_select", diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 182d33319906b..1a083c30fcef9 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -549,6 +549,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::FLOAT32})}, {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"grid_sampler", XPUKernelSet({phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"hard_sigmoid_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, @@ -913,6 +915,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT16, phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"spatial_transformer_resblock_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"split", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, @@ -1209,7 +1213,9 @@ XPUOpMap& get_kl2_ops() { {"fused_feedforward_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"qkv_attention_xpu", - XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::INT8})}, {"lod_reset", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, @@ -1224,6 +1230,8 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"roformer_relative_embedding_xpu", XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, + {"cross_attention_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"variable_length_memory_efficient_attention", XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"flash_attn_unpadded", diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index 48dc5d8334193..80f95f0721ed8 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -523,6 +523,8 @@ XPUOpMap& get_kl3_ops() { phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"hard_sigmoid_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h index f28dd7e1c6ef1..e7a07b7a3e525 100644 --- a/paddle/phi/common/data_type.h +++ b/paddle/phi/common/data_type.h @@ -253,6 +253,46 @@ inline std::string DataTypeToString(const DataType& dtype) { } } +inline DataType StringToDataType(const std::string& dtype) { + if (dtype == "Undefined(ALL_DTYPE)") { + return DataType::UNDEFINED; + } else if (dtype == "bool") { + return DataType::BOOL; + } else if (dtype == "int8") { + return DataType::INT8; + } else if (dtype == "uint8") { + return DataType::UINT8; + } else if (dtype == "int16") { + return DataType::INT16; + } else if (dtype == "uint16") { + return DataType::UINT16; + } else if (dtype == "int32") { + return DataType::INT32; + } else if (dtype == "uint32") { + return DataType::UINT32; + } else if (dtype == "int64") { + return DataType::INT64; + } else if (dtype == "uint64") { + return DataType::UINT64; + } else if (dtype == "bfloat16") { + return DataType::BFLOAT16; + } else if (dtype == "float16") { + return DataType::FLOAT16; + } else if (dtype == "float32") { + return DataType::FLOAT32; + } else if (dtype == "float64") { + return DataType::FLOAT64; + } else if (dtype == "complex64") { + return DataType::COMPLEX64; + } else if (dtype == "complex128") { + return DataType::COMPLEX128; + } else if (dtype == "pstring") { + return DataType::PSTRING; + } else { + PD_THROW("Invalid enum data type `", dtype, "`."); + } +} + } // namespace phi namespace paddle { diff --git a/paddle/phi/config.h.in b/paddle/phi/config.h.in index cb3d7eadc7f04..38cac639437b7 100644 --- a/paddle/phi/config.h.in +++ b/paddle/phi/config.h.in @@ -12,8 +12,8 @@ #define ON 1 #define OFF 0 -// WITH_MKLDNN -#if @WITH_MKLDNN@ +// WITH_ONEDNN +#if @WITH_ONEDNN@ #undef PADDLE_WITH_DNNL #define PADDLE_WITH_DNNL #endif diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index b78cec1483272..c2d804199d2c7 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -203,7 +203,7 @@ class TEST_API DenseTensor : public TensorBase, * * 1. Some hardware or third-party libraries add some additional storage * properties on top of the description of the basic DenseTensor, such as - * memory desc of MKLDNN, storage_format and storage_layout of NPU, + * memory desc of OneDNN, storage_format and storage_layout of NPU, * these members are necessary for optimal performance, but if the properties * of each device are added to the DenseTensor with different macro isolation, * the memory layout of the DenseTensor will become more fragmented. diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index f493e0249d7bf..613ba5f1f7f1f 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -75,7 +75,7 @@ struct TEST_API DenseTensorMeta { bool is_scalar{false}; /// \brief Determine whether using gpudnn speed-up library in the new dygraph. - /// It maybe also support MKLDNN library in the near future. + /// It maybe also support OneDNN library in the near future. bool use_gpudnn{true}; DDim dims; DataType dtype{DataType::UNDEFINED}; diff --git a/paddle/phi/core/visit_type.h b/paddle/phi/core/visit_type.h index ad30da4ddcd6f..03da054450092 100644 --- a/paddle/phi/core/visit_type.h +++ b/paddle/phi/core/visit_type.h @@ -355,7 +355,7 @@ namespace phi { "`"); \ } \ }() -#if defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_HIP) +#if defined(PADDLE_WITH_XPU) #define PD_VISIT_ALL_TYPES(TYPE, NAME, ...) \ [&] { \ const auto& __dtype__ = TYPE; \ diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index b56e7fab0bfe6..e8eb740e453ff 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -116,6 +116,20 @@ void AddLayernormXPUInferMeta(const MetaTensor& x, out->share_lod(x); } +void GroupNormalizeSiluXPUInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + int groups, + float epsilon, + MetaTensor* out) { + auto x_dims = x.dims(); + auto out_dims = x_dims; + out->set_dims(out_dims); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + out->share_lod(x); +} + void FusedMultiTransformerInferMeta( const MetaTensor& x, const std::vector& ln_scales, @@ -568,6 +582,36 @@ void Conv2dXPUInferMeta(const MetaTensor& x, out->set_dtype(out_dtype); } +void SpatialTransformerResblockXPUInferMeta( + const MetaTensor& x, + const std::vector& x_max, + const std::vector& conv_bias, + const std::vector& conv_filter, + const std::vector& conv_filter_max, + const std::vector& gn_bias, + const std::vector& gn_scale, + const std::vector& dilations, + const std::vector& paddings, + const std::vector& strides, + const std::vector& gn_eps, + const std::vector& gn_groups, + const std::vector& groups, + bool conv_fix, + bool has_silu_fc_input, + bool include_silu, + MetaTensor* out, + MetaTensor* out_max) { + auto input_shape = x.dims(); + auto batch_size = input_shape[0]; + auto channel_out = conv_filter[0]->dims()[0]; + auto h = input_shape[2]; + auto w = input_shape[3]; + out->set_dims(common::make_ddim({batch_size, channel_out, h, w})); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + out->share_lod(x); +} + void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, const std::vector& tables, @@ -3032,7 +3076,7 @@ void FusedConv2dAddActInferMeta(const MetaTensor& input, MetaTensor* output, std::vector outputs, MetaConfig config) { - // TODO(liuyuanle): mkldnn seems only support nchw. + // TODO(liuyuanle): onednn seems only support nchw. const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); std::vector out_shape = ComputeOutputShape(input, filter, @@ -3731,13 +3775,14 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, const MetaTensor& q_max, const MetaTensor& k_max, const MetaTensor& v_max, + const MetaTensor& qk_max, + const MetaTensor& qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype, - MetaTensor* qkv, - MetaTensor* qkv_max) { + MetaTensor* qkv) { auto q_dims = q.dims(); auto k_dims = k.dims(); auto v_dims = v.dims(); @@ -3781,9 +3826,6 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, qkv->set_dims(phi::make_ddim({q_dims[0], q_dims[1], head_num * head_dim})); qkv->set_dtype(out_dtype); qkv->set_layout(q.layout()); - qkv_max->set_dims(phi::make_ddim({6})); - qkv_max->set_dtype(out_dtype); - qkv_max->set_layout(q.layout()); } void SinePosXPUInferMeta(const MetaTensor& x, const MetaTensor& y, @@ -3816,6 +3858,95 @@ void SinePosXPUInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void CrossAttentionXPUInferMeta( + const MetaTensor& input_q, + const MetaTensor& input_kv, + const std::vector& fc_weight, + const std::vector& fc_weight_max, + const std::vector& fc_bias, + const MetaTensor& mask, + int head_num, + int head_dim, + float alpha, + DataType out_dtype, + MetaTensor* qkv, + MetaTensor* qkv_max) { + auto input_q_dims = input_q.dims(); + auto input_kv_dims = input_kv.dims(); + auto mask_dims = mask.dims(); + // input shape : {B, L, H*D} + PADDLE_ENFORCE_EQ(input_q_dims.size(), + 3, + phi::errors::InvalidArgument( + "The dim of input_q should be 3! But received ", + input_q_dims.size())); + PADDLE_ENFORCE_EQ(input_kv_dims.size(), + 3, + phi::errors::InvalidArgument( + "The dim of input_kv should be 3! But received ", + input_kv_dims.size())); + // sequece length of q and k/v not requied to be eqaul + // but batch size and dim should be the same + PADDLE_ENFORCE_EQ( + input_q_dims[0], + input_kv_dims[0], + phi::errors::InvalidArgument("The batch size of input_q and input_kv " + "should be the same! Received ", + input_q_dims[0], + " vs ", + input_kv_dims[0])); + PADDLE_ENFORCE_EQ( + input_q_dims[2], + input_kv_dims[2], + phi::errors::InvalidArgument("The hidden_dim of input_q and input_kv " + "should be the same! Received ", + input_q_dims[2], + " vs ", + input_kv_dims[2])); + int hidden_dim = head_num * head_dim; + PADDLE_ENFORCE_EQ( + input_q_dims[2], + hidden_dim, + phi::errors::InvalidArgument( + "The last dimension of input_q should be [H*D]! Received ", + input_q_dims[2], + " != expected ", + hidden_dim)); + PADDLE_ENFORCE_EQ(fc_weight.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_weight should be 3! But received ", + fc_weight.size())); + PADDLE_ENFORCE_EQ(fc_weight_max.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_weight_max should be 3! But received ", + fc_weight_max.size())); + PADDLE_ENFORCE_EQ( + fc_bias.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_bias should be 3! But received ", fc_bias.size())); + PADDLE_ENFORCE_LE( + mask_dims.size(), + 4, + phi::errors::InvalidArgument( + "The dim of mask should be not greater than 4!", mask_dims.size())); + + // output shape: {B, qL, H*D} + qkv->set_dims( + phi::make_ddim({input_q_dims[0], input_q_dims[1], head_num * head_dim})); + qkv->set_dtype(out_dtype); + qkv->set_layout(input_q.layout()); + // TODO(Terry) optmize the max value num + // unable to pass few PR-CIs, so just use a constant value + // int xpu2_max_value_num = phi::backends::xpu::get_xpu_max_ptr_size(-1); + const int xpu2_max_value_num = 6; + qkv_max->set_dims(phi::make_ddim({xpu2_max_value_num})); + qkv_max->set_dtype(out_dtype); + qkv_max->set_layout(input_q.layout()); +} + void MultiGruInferMeta( const MetaTensor& x, const std::vector& weight_x, diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index 0a7224e39f73b..632a656414b4f 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -70,6 +70,13 @@ void AddLayernormXPUInferMeta(const MetaTensor& x, float epsilon, MetaTensor* out); +void GroupNormalizeSiluXPUInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + int groups, + float epsilon, + MetaTensor* out); + void BlockMultiheadAttentionInferMeta(const MetaTensor& qkv, const MetaTensor& key_cache, const MetaTensor& value_cache, @@ -145,6 +152,26 @@ void Conv2dXPUInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* out_max); +void SpatialTransformerResblockXPUInferMeta( + const MetaTensor& x, + const std::vector& x_max, + const std::vector& conv_bias, + const std::vector& conv_filter, + const std::vector& conv_filter_max, + const std::vector& gn_bias, + const std::vector& gn_scale, + const std::vector& dilations, + const std::vector& paddings, + const std::vector& strides, + const std::vector& gn_eps, + const std::vector& gn_groups, + const std::vector& groups, + bool conv_fix, + bool has_silu_fc_input, + bool include_silu, + MetaTensor* out, + MetaTensor* out_max); + void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, const std::vector& tables, @@ -862,13 +889,14 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, const MetaTensor& q_max, const MetaTensor& k_max, const MetaTensor& v_max, + const MetaTensor& qk_max, + const MetaTensor& qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype, - MetaTensor* qkv, - MetaTensor* qkv_max); + MetaTensor* qkv); void SinePosXPUInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); @@ -877,6 +905,19 @@ void RoformerRelativePosXPUInferMeta(const MetaTensor& x, const MetaTensor& cos_emb, int max_pos_len, MetaTensor* out); +void CrossAttentionXPUInferMeta( + const MetaTensor& input_q, + const MetaTensor& input_kv, + const std::vector& fc_weight, + const std::vector& fc_weight_max, + const std::vector& fc_bias, + const MetaTensor& mask, + int head_num, + int head_dim, + float alpha, + DataType out_dtype, + MetaTensor* qkv, + MetaTensor* qkv_max); void MultiGruInferMeta( const MetaTensor& x, diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a87fdd936b89d..ceebbdb5b2d74 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -4921,10 +4921,10 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x, } } -void FullWithTensorInferMeta(const MetaTensor& shape, +void FullWithTensorInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out) { - out->set_dims(common::make_ddim(std::vector(shape.numel(), -1))); + out->set_dims(common::make_ddim(shape.GetData())); out->set_dtype(dtype); } diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 7a94ef98bc993..8d6a366fdbb24 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -952,7 +952,7 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x, MetaTensor* cache_kv_out, MetaTensor* beam_cache_offset_out); -void FullWithTensorInferMeta(const MetaTensor& shape, +void FullWithTensorInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out); diff --git a/paddle/phi/infermeta/spmd_rules/rules.cc b/paddle/phi/infermeta/spmd_rules/rules.cc index 9c6492ee75913..d74beb98de74e 100644 --- a/paddle/phi/infermeta/spmd_rules/rules.cc +++ b/paddle/phi/infermeta/spmd_rules/rules.cc @@ -502,6 +502,16 @@ PD_REGISTER_SPMD_RULE( PD_INFER_SPMD(phi::distributed::LayerNormInferSpmd), PD_INFER_SPMD(phi::distributed::LayerNormInferSpmdReverse)); +// fused_rms_norm +// NOTE(ZHIQIU): Temporally register fused_rms_norm rule, +// this is not for rms_norm kernel, but for the custom kernel +// 'fused_rms_norm' in PaddleNLP. +// It will be no longer needed when the PIR-AutoParallel project +// is finished. +PD_REGISTER_SPMD_RULE(fused_rms_norm, + PD_INFER_SPMD(phi::distributed::RmsNormInferSpmd), + PD_INFER_SPMD(phi::distributed::RmsNormInferSpmdReverse)); + PD_REGISTER_SPMD_RULE( flash_attention, PD_INFER_SPMD(phi::distributed::FlashAttInferSpmdStatic), diff --git a/paddle/phi/infermeta/spmd_rules/swiglu.cc b/paddle/phi/infermeta/spmd_rules/swiglu.cc index 924a80c2e39a0..040b8100d8042 100644 --- a/paddle/phi/infermeta/spmd_rules/swiglu.cc +++ b/paddle/phi/infermeta/spmd_rules/swiglu.cc @@ -27,8 +27,14 @@ namespace distributed { SpmdInfo SwiGLUInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y) { // y.dist_attr() is empty means y is None if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryInferSpmd(x); + return {{res.first[0], y.dist_attr()}, {res.second[0]}}; } else { return ElementwiseBinaryInferSpmd(x, y); } @@ -38,8 +44,14 @@ SpmdInfo SwiGLUInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out) { if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryInferSpmdReverse(x, out); + return {{res.first[0], y.dist_attr()}, {res.second[0]}}; } else { return ElementwiseBinaryInferSpmdReverse(x, y, out); } @@ -49,8 +61,15 @@ SpmdInfo SwiGLUGradInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out_grad) { if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryGradInferSpmd(x, out_grad); + return {{res.first[0], y.dist_attr(), res.first[1]}, + {res.second[0], y.dist_attr()}}; } else { return ElementwiseBinaryGradInferSpmd(x, y, out_grad); } diff --git a/paddle/phi/infermeta/spmd_rules/tile.cc b/paddle/phi/infermeta/spmd_rules/tile.cc index 76eb0dd95f632..e6d98a1b28303 100644 --- a/paddle/phi/infermeta/spmd_rules/tile.cc +++ b/paddle/phi/infermeta/spmd_rules/tile.cc @@ -151,7 +151,7 @@ SpmdInfo TileInferSpmdReverse(const DistMetaTensor& x, auto x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src); x_dist_attr_dst.set_dims_mapping(x_dims_mapping_dst); - VLOG(4) << "TriuInferSpmdReverse:"; + VLOG(4) << "TileInferSpmdReverse:"; VLOG(4) << "out shape: [" << str_join(out_shape) << "]" << "src_dims_mapping: [" << str_join(out_dist_attr_src.dims_mapping()) diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index f10a86b33836a..beba7457039cc 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -146,6 +146,25 @@ void AddmmInferMeta(const MetaTensor& input, out->set_dtype(input.dtype()); } +void AssignPosInferMeta(const MetaTensor& x, + const MetaTensor& cum_count, + const MetaTensor& eff_num_len, + MetaTensor* out) { + phi::DataType X_dtype = x.dtype(); + phi::DataType cum_count_dtype = cum_count.dtype(); + + PADDLE_ENFORCE_EQ(cum_count_dtype, + X_dtype, + phi::errors::InvalidArgument( + "The dtype of the cum_count and X should be same")); + PADDLE_ENFORCE_EQ(cum_count_dtype, + phi::DataType::INT64, + phi::errors::InvalidArgument( + "The dtype of the cum_count_dtype, eff_num_len and " + "X should be same as int64")); + out->set_dtype(X_dtype); +} + void BatchFCInferMeta(const MetaTensor& input, const MetaTensor& w, const MetaTensor& bias, @@ -1429,12 +1448,19 @@ void ScatterNdAddInferMeta(const MetaTensor& x, // update.shape = index.shape[:-1] + output.shape[index.shape[-1]:] std::vector r_updates_dims; + bool without_dynamic_shape = true; for (int i = 0; i < index_dims_size - 1; ++i) { + if (index_dims[i] == -1) { + without_dynamic_shape = false; + } r_updates_dims.emplace_back(index_dims[i]); } for (int i = static_cast(index_dims[index_dims_size - 1]); i < ref_dims_size; ++i) { + if (ref_dims[i] == -1) { + without_dynamic_shape = false; + } r_updates_dims.emplace_back(ref_dims[i]); } // check for non-0d updates @@ -1442,25 +1468,27 @@ void ScatterNdAddInferMeta(const MetaTensor& x, r_updates_dims.size(), updates_dims_size, phi::errors::InvalidArgument( - "Updates has wrong shape. The shape of Updates and Input(Updates) " + "Updates has wrong shape. The shape of Updates and " + "Input(Updates) " "should be same, but received the shape of Updates is %d, " "the shape of Input(Updates) is %d.", r_updates_dims.size(), updates_dims_size)); - - for (int64_t i = 0; i < updates_dims_size; ++i) { - PADDLE_ENFORCE_EQ( - r_updates_dims[i], - updates_dims[i], - phi::errors::InvalidArgument( - "Updates has wrong shape. The dimensions of Updates and " - "Input(Updates) should match, but received Updates's" - "%d-th dimension is %d, Input(Updates)'s %d-th " - "dimension is %d.", - i, - r_updates_dims[i], - i, - updates_dims[i])); + if (without_dynamic_shape) { + for (int64_t i = 0; i < updates_dims_size; ++i) { + PADDLE_ENFORCE_EQ( + r_updates_dims[i], + updates_dims[i], + phi::errors::InvalidArgument( + "Updates has wrong shape. The dimensions of Updates and " + "Input(Updates) should match, but received Updates's" + "%d-th dimension is %d, Input(Updates)'s %d-th " + "dimension is %d.", + i, + r_updates_dims[i], + i, + updates_dims[i])); + } } } out->set_dims(ref_dims); diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h index c1c1af6f08218..c7c31e767f40f 100644 --- a/paddle/phi/infermeta/ternary.h +++ b/paddle/phi/infermeta/ternary.h @@ -53,6 +53,11 @@ void ArangeTensorInferMeta(const MetaTensor& start, const MetaTensor& step, MetaTensor* out); +void AssignPosInferMeta(const MetaTensor& x, + const MetaTensor& cum_count, + const MetaTensor& eff_num_len, + MetaTensor* out); + void BatchFCInferMeta(const MetaTensor& input, const MetaTensor& w, const MetaTensor& bias, diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 304fd3cef793a..891888bf8b585 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -209,11 +209,9 @@ if(WITH_ROCM) "gpu/lu_kernel.cu" "gpu/matrix_rank_kernel.cu" "gpu/matrix_rank_tol_kernel.cu" - "gpu/multiclass_nms3_kernel.cu" "gpu/put_along_axis_grad_kernel.cu" "gpu/put_along_axis_kernel.cu" "gpu/qr_kernel.cu" - "gpu/rms_norm_grad_kernel.cu" "gpu/svd_kernel.cu" "gpudnn/mha_cudnn_frontend.cu" "fusion/gpu/block_multi_head_attention_kernel.cu" @@ -239,7 +237,7 @@ set(cc_search_pattern "stride/*.cc" "fusion/cpu/*.cc") -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(cc_search_pattern ${cc_search_pattern} "legacy/onednn/*.cc" "onednn/*.cc" "fusion/onednn/*.cc") endif() @@ -262,6 +260,7 @@ if(NOT AND AVX512F_FOUND AND AVX512F_FLAG AND WITH_MKL)) + list(REMOVE_ITEM kernel_cc "fusion/cpu/fused_layer_norm_avx_kernel.cc") list(REMOVE_ITEM kernel_cc "fusion/cpu/self_dp_attention_kernel.cc") endif() diff --git a/paddle/phi/kernels/cpu/cross_grad_kernel.cc b/paddle/phi/kernels/cpu/cross_grad_kernel.cc index 882c3dd9ee512..4c41107ba0199 100644 --- a/paddle/phi/kernels/cpu/cross_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_grad_kernel.cc @@ -18,6 +18,8 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/complex_functors.h" +#include "paddle/phi/kernels/funcs/for_range.h" namespace phi { @@ -81,9 +83,27 @@ void CrossGradKernel(const Context &dev_ctx, slice_size *= static_cast(input_x_dims[i]); } + int64_t numel = x.numel(); + DenseTensor x_conj, y_conj; + DenseTensorMeta meta_xy(x.dtype(), x.dims()); + x_conj.set_meta(meta_xy); + y_conj.set_meta(meta_xy); + + auto *input_x_conj_data = dev_ctx.template Alloc(&x_conj); + + auto *input_y_conj_data = dev_ctx.template Alloc(&y_conj); + + phi::funcs::ForRange for_range(dev_ctx, numel); + phi::funcs::ConjFunctor functor_x( + input_x.data(), numel, input_x_conj_data); + phi::funcs::ConjFunctor functor_y( + input_y.data(), numel, input_y_conj_data); + for_range(functor_x); + for_range(functor_y); + std::vector input_x_vec, input_y_vec, input_dout_vec; - phi::TensorToVector(input_x, dev_ctx, &input_x_vec); - phi::TensorToVector(input_y, dev_ctx, &input_y_vec); + phi::TensorToVector(x_conj, dev_ctx, &input_x_vec); + phi::TensorToVector(y_conj, dev_ctx, &input_y_vec); phi::TensorToVector(input_out_grad, dev_ctx, &input_dout_vec); std::vector out_dx_vec(output_x_grad->numel()); std::vector out_dy_vec(output_y_grad->numel()); @@ -120,4 +140,6 @@ PD_REGISTER_KERNEL(cross_grad, float, double, int, - int64_t) {} + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/cross_kernel.cc b/paddle/phi/kernels/cpu/cross_kernel.cc index 0f45b7c304e31..95f826cfe9132 100644 --- a/paddle/phi/kernels/cpu/cross_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_kernel.cc @@ -105,5 +105,13 @@ void CrossKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL( - cross, CPU, ALL_LAYOUT, phi::CrossKernel, float, double, int, int64_t) {} +PD_REGISTER_KERNEL(cross, + CPU, + ALL_LAYOUT, + phi::CrossKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc index b1a6ceda3647d..278b3bea324f1 100644 --- a/paddle/phi/kernels/cpu/full_kernel.cc +++ b/paddle/phi/kernels/cpu/full_kernel.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#include "paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h" +#include "paddle/phi/kernels/impl/full_with_tensor_kernel_impl.h" namespace phi { @@ -156,5 +156,4 @@ PD_REGISTER_KERNEL(full_with_tensor, phi::dtype::complex, phi::dtype::complex) { kernel->InputAt(0).SetBackend(phi::Backend::CPU); - kernel->InputAt(1).SetBackend(phi::Backend::CPU); } diff --git a/paddle/phi/kernels/cpu/isfinite_kernel.cc b/paddle/phi/kernels/cpu/isfinite_kernel.cc index c9f69c5f7e4f5..2fa44670c15c2 100644 --- a/paddle/phi/kernels/cpu/isfinite_kernel.cc +++ b/paddle/phi/kernels/cpu/isfinite_kernel.cc @@ -27,7 +27,10 @@ PD_REGISTER_KERNEL(isinf, phi::dtype::float16, phi::dtype::bfloat16, int, - int64_t) { + int64_t, + int16_t, + int8_t, + uint8_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); } diff --git a/paddle/phi/kernels/cpu/log_softmax_kernel.cc b/paddle/phi/kernels/cpu/log_softmax_kernel.cc index a57ab908d24ca..26e894945284c 100644 --- a/paddle/phi/kernels/cpu/log_softmax_kernel.cc +++ b/paddle/phi/kernels/cpu/log_softmax_kernel.cc @@ -122,7 +122,7 @@ void LogSoftmaxKernel(const Context& dev_ctx, } // namespace phi -// TODO(YuanRisheng): The layout of mkldnn kernel should be MKLDNN, we should +// TODO(YuanRisheng): The layout of onednn kernel should be OneDNN, we should // support specifying the exact layout when the kernel is registered PD_REGISTER_KERNEL( log_softmax, CPU, ALL_LAYOUT, phi::LogSoftmaxKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/nanmedian_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_kernel.cc index 2911d5c0fcec5..c38cb831d379b 100644 --- a/paddle/phi/kernels/cpu/nanmedian_kernel.cc +++ b/paddle/phi/kernels/cpu/nanmedian_kernel.cc @@ -103,8 +103,12 @@ void CalcMedianFunc(const Context& dev_ctx, offset = i * sort_k; int64_t pos = offset + sort_k - 1; o_ptr[i] = sort_out_ptr[pos]; - m_ptr[2 * i] = sort_indices_ptr[pos]; - m_ptr[2 * i + 1] = sort_indices_ptr[pos]; + if (mode == "avg") { + m_ptr[2 * i] = sort_indices_ptr[pos]; + m_ptr[2 * i + 1] = sort_indices_ptr[pos]; + } else { + m_ptr[i] = sort_indices_ptr[pos]; + } } } else { for (i = 0; i < pre_dim; i++) { diff --git a/paddle/phi/kernels/flatten_kernel.h b/paddle/phi/kernels/flatten_kernel.h index b941a1fbb9691..ac53c5b82c6cb 100644 --- a/paddle/phi/kernels/flatten_kernel.h +++ b/paddle/phi/kernels/flatten_kernel.h @@ -40,7 +40,8 @@ void FlattenInferStridedKernel(const Context& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis, - DenseTensor* out); + DenseTensor* out, + DenseTensor* xshape); template void FlattenStridedKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/full_kernel.h b/paddle/phi/kernels/full_kernel.h index b10e02658fe75..e6d80ed43dff4 100644 --- a/paddle/phi/kernels/full_kernel.h +++ b/paddle/phi/kernels/full_kernel.h @@ -33,8 +33,8 @@ void FullKernel(const Context& dev_ctx, template void FullWithTensorKernel(const Context& dev_ctx, - const DenseTensor& shape, const DenseTensor& value, + const IntArray& shape, DataType dtype, DenseTensor* out); diff --git a/paddle/phi/kernels/funcs/dropout_impl.cu.h b/paddle/phi/kernels/funcs/dropout_impl.cu.h index 463272a37c00d..855b6fe6c8e15 100644 --- a/paddle/phi/kernels/funcs/dropout_impl.cu.h +++ b/paddle/phi/kernels/funcs/dropout_impl.cu.h @@ -349,19 +349,6 @@ void DropoutFwGPUKernelDriver( } else { bool copy_in_kernel = GetSeedDataAndIncrement( dev_ctx, seed, is_fix_seed, seed_val, offset, &seed_data, &increment); -#ifdef PADDLE_WITH_HIP - VectorizedRandomGenerator - <<>>(0, - size, - seed_data, - dropout_prob, - x_data, - mask_data, - y_data, - upscale_in_train, - increment, - main_offset); -#else const phi::GPUContext* dev_ctx_p = &dev_ctx; auto gen_cuda = dev_ctx.GetGenerator(); auto state_index = gen_cuda->GetStateIndex(); @@ -370,10 +357,11 @@ void DropoutFwGPUKernelDriver( parameterSetter = [offset, dev_ctx_p, state_index, is_fix_seed]( phi::backends::gpu::gpuKernelParams& params) { if (!is_fix_seed) { - // we assume seed is null pointer - // seed copy to cpu is meaningless here + // we assume seed is null pointer + // seed copy to cpu is meaningless here +#ifndef PADDLE_WITH_HIP assert(seed_tensor_ptr == nullptr); - +#endif auto gen_cuda = dev_ctx_p->GetGenerator(); // ensure the generator use correct state index gen_cuda->SetStateIndex(state_index); @@ -393,9 +381,14 @@ void DropoutFwGPUKernelDriver( cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast(&(VectorizedRandomGenerator)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = + reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -417,7 +410,6 @@ void DropoutFwGPUKernelDriver( VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } } else { if (upscale_in_train) { diff --git a/paddle/phi/kernels/funcs/jit/README.en.md b/paddle/phi/kernels/funcs/jit/README.en.md index 0e1958a5c1415..cf661d5468a6c 100644 --- a/paddle/phi/kernels/funcs/jit/README.en.md +++ b/paddle/phi/kernels/funcs/jit/README.en.md @@ -100,4 +100,4 @@ Add more implementations of `your_key` for performance enhancement. 1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have corresponding creator from `JitCodeCreator` which will be registered on the `your_key`. 2. If new attribute type is added, you should specialize `JitCodeKey` of this type. -3. Add more functions in `more`,you can use any third party you wish, like mkl, mkldnn or intrinsic code to reach the best performance. +3. Add more functions in `more`,you can use any third party you wish, like mkl, onednn or intrinsic code to reach the best performance. diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h index 6a82875819161..3eee52efcbebe 100644 --- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h +++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h @@ -166,14 +166,14 @@ __inline__ __device__ double rsqrt_(const double val) { return ::rsqrt(val); } -#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) +#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) || defined(PADDLE_WITH_HIP) template <> __inline__ __device__ half rsqrt_(const half val) { return hrsqrt(val); } #endif -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template 1) { if (lane == 0) { @@ -290,7 +294,11 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fast_ln_fwd_kernel( #pragma unroll for (int it = 1; it < THREADS_PER_WARP; it *= 2) { +#ifdef PADDLE_WITH_HIP + var_local += __shfl_xor(var_local, it); +#else var_local += __shfl_xor_sync(uint32_t(-1), var_local, it); +#endif } if (WARPS_N > 1) { @@ -546,7 +554,7 @@ __inline__ __device__ void cuLoadAddStridedInputs(const int64_t i1_block, } } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template 0; it /= 2) { +#ifdef PADDLE_WITH_HIP + sum_loss1 += __shfl_down(sum_loss1, it); + sum_loss2 += __shfl_down(sum_loss2, it); +#else sum_loss1 += __shfl_down_sync(uint32_t(-1), sum_loss1, it); sum_loss2 += __shfl_down_sync(uint32_t(-1), sum_loss2, it); +#endif } if (lane == 0) { diff --git a/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc new file mode 100644 index 0000000000000..62944d7ea3b09 --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc @@ -0,0 +1,244 @@ +// Copyright (c) 2024 PaddlePaddle Authors All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { +namespace fusion { + +template +void ResidualBiasSumFunc(const T* x_data, + const T* residual_data, + const T* bias_data, + const float residual_alpha, + const int rows, + const int cols, + const int iStride, + const int oStride, + T* out_data) { + __m512 vresidual_alpha = _mm512_set1_ps(residual_alpha); + const T* pb = bias_data; +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int r = 0; r < rows; ++r) { + const T* px = x_data + r * iStride; + const T* pr = residual_data ? residual_data + r * iStride : nullptr; + T* py = out_data + r * oStride; + for (int col = 0; col < cols; col += 16) { + int remain = cols - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + // residual*alpha + bias + x + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + } + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + _mm512_mask_storeu_ps(py + col, mask, vx); + } + } +} + +template +void LayerNormFunc(const T* x_data, + const T* residual_data, + const T* bias_data, + const T* norm_weight_data, + const T* norm_bias_data, + const float epsilon, + const float residual_alpha, + const int rows, + const int cols, + const int iStride, + const int oStride, + T* out_data, + T* residual_out_data, + T* mean_out, + T* var_out) { + auto size = cols; + __m512 vresidual_alpha = _mm512_set1_ps(residual_alpha); + __m512 vgamma = _mm512_set1_ps(1); + __m512 vbeta = _mm512_set1_ps(0); + const T* pb = bias_data; +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int r = 0; r < rows; ++r) { + const T* px = x_data + r * iStride; + const T* pr = residual_data ? residual_data + r * iStride : nullptr; + T* pr_out = residual_out_data ? residual_out_data + r * oStride : nullptr; + T* py = out_data + r * oStride; + + T sum = 0; + T squareSum = 0; + + __m512 vsum = _mm512_set1_ps(0); + __m512 vsqare = _mm512_set1_ps(0); + for (int col = 0; col < size; col += 16) { + int remain = size - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + // SUM(x) + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + _mm512_mask_storeu_ps(pr_out + col, mask, vx); + } + vsum = _mm512_add_ps(vsum, vx); + + // SUM(x*x) + __m512 tmp = _mm512_mul_ps(vx, vx); + vsqare = _mm512_add_ps(vsqare, tmp); + } + + sum = _mm512_reduce_add_ps(vsum); + squareSum = _mm512_reduce_add_ps(vsqare); + + // Mean + T mean = sum / size; + mean_out[r] = mean; + __m512 vmean = _mm512_set1_ps(mean); + + // Variance + T var = 1 / sqrt(squareSum / size - mean * mean + epsilon); + var_out[r] = var; + __m512 vvar = _mm512_set1_ps(var); + + for (int col = 0; col < size; col += 16) { + int remain = size - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + } + if (norm_weight_data) { + vgamma = _mm512_maskz_loadu_ps(mask, norm_weight_data + col); + } + if (norm_bias_data) { + vbeta = _mm512_maskz_loadu_ps(mask, norm_bias_data + col); + } + // (vx - vmean) * vgamma * vvar + vbeta + vx = _mm512_mask_sub_ps(vx, mask, vx, vmean); + vx = _mm512_mask_mul_ps(vx, mask, vx, vgamma); + vx = _mm512_mask_mul_ps(vx, mask, vx, vvar); + __m512 vy = _mm512_mask_add_ps(vx, mask, vx, vbeta); + _mm512_mask_storeu_ps(py + col, mask, vy); + } + } +} + +template +void FusedLayerNormAvxKernel(const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& bias, + const paddle::optional& residual, + const paddle::optional& norm_weight, + const paddle::optional& norm_bias, + const float epsilon, + const float residual_alpha, + const int begin_norm_axis, + const float quant_scale, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + DenseTensor* out, + DenseTensor* residual_out, + DenseTensor* mean, + DenseTensor* variance) { + if (quant_scale > 0.0f) { + PD_THROW("NOT supported quant int8. "); + } + const auto x_dims = x.dims(); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); + T* out_data = dev_ctx.template Alloc(out); + T* mean_out = dev_ctx.template Alloc(mean); + T* var_out = dev_ctx.template Alloc(variance); + + const T* x_data = x.data(); + const T* bias_data = bias ? bias.get().data() : nullptr; + const T* residual_data = residual ? residual.get().data() : nullptr; + const T* norm_weight_data = + norm_weight ? norm_weight.get().data() : nullptr; + const T* norm_bias_data = norm_bias ? norm_bias.get().data() : nullptr; + T* residual_out_data = + residual ? dev_ctx.template Alloc(residual_out) : nullptr; + + int32_t rows = static_cast(matrix_dim[0]); + int32_t cols = static_cast(matrix_dim[1]); + + auto iStride = cols; + auto oStride = cols; + if (!norm_weight && !norm_bias_data) { + ResidualBiasSumFunc(x_data, + residual_data, + bias_data, + residual_alpha, + rows, + cols, + iStride, + oStride, + out_data); + } else { + LayerNormFunc(x_data, + residual_data, + bias_data, + norm_weight_data, + norm_bias_data, + epsilon, + residual_alpha, + rows, + cols, + iStride, + oStride, + out_data, + residual_out_data, + mean_out, + var_out); + } +} +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(fused_bias_residual_layernorm, + CPU, + ALL_LAYOUT, + phi::fusion::FusedLayerNormAvxKernel, + float) {} diff --git a/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc b/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc index 0d3189187351c..dff41e6d4250c 100644 --- a/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc @@ -257,7 +257,9 @@ void softmax_sum_max(float* AB, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vx = _mm512_maskz_loadu_ps(mask, buf + off); - vx = vexp(vx * vrefac - vmax); + vx = _mm512_mask_mul_ps(vx, mask, vx, vrefac); + vx = _mm512_mask_sub_ps(vx, mask, vx, vmax); + vx = vexp(vx); _mm512_mask_storeu_ps(buf + off, mask, vx); @@ -275,8 +277,7 @@ void softmax_sum_max(float* AB, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vx = _mm512_maskz_loadu_ps(mask, buf + off); - vx = vx * vrsum; - + vx = _mm512_mask_mul_ps(vx, mask, vx, vrsum); _mm512_mask_storeu_ps(buf + off, mask, vx); } } @@ -301,7 +302,10 @@ void update_out_blk(float* output, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vout = _mm512_maskz_loadu_ps(mask, outbuf + off); __m512 vabc = _mm512_maskz_loadu_ps(mask, buf + off); - __m512 vupt = vout * merr * vfac + vabc; + vout = _mm512_mask_mul_ps(vout, mask, vout, merr); + vout = _mm512_mask_mul_ps(vout, mask, vout, vfac); + __m512 vupt = _mm512_set1_ps(0.0f); + vupt = _mm512_mask_add_ps(vupt, mask, vout, vabc); _mm512_mask_storeu_ps(outbuf + off, mask, vupt); } pre_sum[i] = sum[i]; diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu index 60a82cfe7c198..48819c12a8dc0 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu @@ -11,7 +11,12 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef PADDLE_WITH_HIP +#ifdef PADDLE_WITH_HIP +#include +#include +#include +namespace cub = hipcub; +#else #include #include #endif @@ -21,9 +26,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" -#ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" -#endif namespace phi { namespace fusion { @@ -51,7 +54,6 @@ void FusedBiasDropoutResidualLnGradKernel( DenseTensor* bias_grad, DenseTensor* ln_scale_grad, DenseTensor* ln_bias_grad) { -#ifndef PADDLE_WITH_HIP using U = LayerNormParamType; auto* d_y_data = y_grad.data(); auto* ln_scale_data = @@ -114,15 +116,19 @@ void FusedBiasDropoutResidualLnGradKernel( d_x_data, d_bias_data, d_residual_data); -#else - PADDLE_THROW(phi::errors::Unimplemented( - "FusedBiasDropoutResidualLnGradKernel not surpport for rocm")); -#endif } } // namespace fusion } // namespace phi +#ifdef PADDLE_WITH_HIP +PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, + GPU, + ALL_LAYOUT, + phi::fusion::FusedBiasDropoutResidualLnGradKernel, + float, + phi::dtype::float16) {} +#else PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, GPU, ALL_LAYOUT, @@ -130,3 +136,4 @@ PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, float, double, phi::dtype::float16) {} +#endif diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu index 37450d3a4e178..ca0bcbe7f2466 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu @@ -17,9 +17,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" -#ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" -#endif namespace phi { namespace fusion { @@ -42,7 +40,6 @@ void FusedBiasDropoutResidualLnKernel( DenseTensor* dropout_mask_out, DenseTensor* ln_mean, DenseTensor* ln_variance) { -#ifndef PADDLE_WITH_HIP using U = phi::funcs::LayerNormParamType; auto* x_data = x.data(); auto* bias_data = (bias.get_ptr() == nullptr) ? nullptr : bias->data(); @@ -95,14 +92,20 @@ void FusedBiasDropoutResidualLnKernel( y_data, ln_mean_data, ln_var_data); -#else - PADDLE_THROW(phi::errors::Unimplemented( - "FusedBiasDropoutResidualLnKernel not support for rocm")); -#endif } } // namespace fusion } // namespace phi +#ifdef PADDLE_WITH_HIP +PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, + GPU, + ALL_LAYOUT, + phi::fusion::FusedBiasDropoutResidualLnKernel, + float, + phi::dtype::float16) { + kernel->OutputAt(1).SetDataType(phi::DataType::UINT8); +} +#else PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, GPU, ALL_LAYOUT, @@ -112,3 +115,4 @@ PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, phi::dtype::float16) { kernel->OutputAt(1).SetDataType(phi::DataType::UINT8); } +#endif diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h b/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h index e5f5c9ba50ba4..d2cd2f1b545a7 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h @@ -35,7 +35,11 @@ struct GeluFunctor { template struct FastGeluFunctor { inline __device__ T operator()(const T x) const { +#ifdef PADDLE_WITH_HIP + assert(0 && "ROCM does not support FastGelu"); +#else return phi::GeluFwd(x); +#endif } }; @@ -92,8 +96,8 @@ __global__ void FusedDropoutActBias( int row_id = blockIdx.y; int idx = row_id * cols + col_id; - curandStatePhilox4_32_10_t state; - curand_init(seed, idx, increment, &state); + GPURAND(StatePhilox4_32_10_t) state; + GPURAND(_init)(seed, idx, increment, &state); const T factor = phi::fusion::GetFactor(dropout_prob, is_upscale_in_train, is_test); diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu index 801f070251fb2..8994d52138233 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu @@ -202,18 +202,6 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, ? NoMaskBwFunctor(1.0f - dropout_rate) : NoMaskBwFunctor(1.0f - dropout_rate, 1.0f); -#ifdef PADDLE_WITH_HIP - VectorizedDropoutBackward> - <<>>(0, - numel, - seed_data, // idx: 2 need save - x_grad_data, - y_grad_data, - out_grad_data, - increment, // idx: 6 need save - main_offset, - functor); -#else // we assume seed/offset is same across iterations // seed_offset_data should preserved by cudaGraph pool const phi::GPUContext* dev_ctx_p = &dev_ctx; @@ -233,9 +221,13 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast( &(VectorizedDropoutBackward>)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -257,7 +249,6 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } } diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu index c95c5fbf0ca3d..54ec3604bbee9 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu @@ -186,18 +186,6 @@ void FusedDropoutAddKernel(const Context& dev_ctx, auto dst_functor = NoMaskFwFunctor(1.0f - dropout_rate, upscale_in_train); -#ifdef PADDLE_WITH_HIP - VectorizedDropoutForward> - <<>>(0, - numel, - seed_data, // need save - x_data, - y_data, - out_data, - increment, // need save - main_offset, - dst_functor); -#else // we assume seed/offset is same across iterations // seed_offset_data should preserved by cudaGraph pool const phi::GPUContext* dev_ctx_p = &dev_ctx; @@ -237,9 +225,13 @@ void FusedDropoutAddKernel(const Context& dev_ctx, cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast( &(VectorizedDropoutForward>)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -260,7 +252,6 @@ void FusedDropoutAddKernel(const Context& dev_ctx, VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } else { using MT = typename phi::dtype::MPTypeTrait::Type; MT factor = static_cast(1.0f - dropout_rate); diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h b/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h index 2ef46378b1b9b..ef9ecbb435fdb 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h @@ -20,10 +20,25 @@ limitations under the License. */ #include #endif +#ifdef PADDLE_WITH_HIP +#include +#include +#include +#include +#endif + #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" +#ifdef PADDLE_WITH_HIP +#define GPU(str) hip##str +#define GPURAND(str) hiprand##str +#else +#define GPU(str) cuda##str +#define GPURAND(str) curand##str +#endif + namespace phi { namespace fusion { @@ -63,26 +78,29 @@ inline phi::backends::gpu::GpuLaunchConfig Get1DBlocksAnd2DGrids( } template -__forceinline__ __device__ void RandVec(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec(GPURAND(StatePhilox4_32_10_t) * state, float *data); template <> -__forceinline__ __device__ void RandVec<1>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<1>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - data[0] = curand_uniform(state); + data[0] = GPURAND(_uniform)(state); } template <> -__forceinline__ __device__ void RandVec<2>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<2>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - data[0] = curand_uniform(state); - data[1] = curand_uniform(state); + data[0] = GPURAND(_uniform)(state); + data[1] = GPURAND(_uniform)(state); } template <> -__forceinline__ __device__ void RandVec<4>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<4>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - float4 rand4 = curand_uniform4(state); + float4 rand4 = GPURAND(_uniform4)(state); data[0] = rand4.x; data[1] = rand4.y; data[2] = rand4.w; @@ -90,7 +108,8 @@ __forceinline__ __device__ void RandVec<4>(curandStatePhilox4_32_10_t *state, } template <> -__forceinline__ __device__ void RandVec<8>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<8>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { RandVec<4>(state, data); RandVec<4>(state, data + 4); @@ -99,7 +118,7 @@ __forceinline__ __device__ void RandVec<8>(curandStatePhilox4_32_10_t *state, template inline void SetZero(const phi::GPUContext &ctx, T *ptr, const size_t size) { PADDLE_ENFORCE_GPU_SUCCESS( - cudaMemsetAsync(ptr, 0, size * sizeof(T), ctx.stream())); + GPU(MemsetAsync)(ptr, 0, size * sizeof(T), ctx.stream())); } /** diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu index e31b24e7e105e..221019531a548 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu @@ -38,10 +38,19 @@ limitations under the License. #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" -#ifndef PADDLE_WITH_HIP -#include #include "paddle/phi/kernels/fusion/gpu/attention_layer.norm.h" #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" +#ifdef PADDLE_WITH_HIP +#include +#include +#include +namespace cub = hipcub; +#define GPU(str) hip##str +#define GPUMultiProcessorCount hipDeviceAttributeMultiprocessorCount +#else +#include +#define GPU(str) cuda##str +#define GPUMultiProcessorCount cudaDevAttrMultiProcessorCount #endif namespace phi { @@ -50,9 +59,11 @@ namespace fusion { namespace { -#ifndef PADDLE_WITH_HIP - +#ifdef PADDLE_WITH_HIP +constexpr int kWarpSize = 64; +#else constexpr int kWarpSize = 32; +#endif template struct SumOp { @@ -74,7 +85,11 @@ template