diff --git a/.gitignore b/.gitignore
index 09a9677680..e5d951cc20 100644
--- a/.gitignore
+++ b/.gitignore
@@ -131,3 +131,7 @@ work_dirs/
 
 # the generated header files
 /tests/test_csrc/test_define.h
+
+#
+!docs/zh_cn/build
+!docs/en/build
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 029a2dcd69..1540e469de 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -46,3 +46,9 @@ repos:
     hooks:
       - id: docformatter
         args: ["--in-place", "--wrap-descriptions", "79"]
+
+  - repo: https://github.com/open-mmlab/pre-commit-hooks
+    rev: v0.2.0
+    hooks:
+    -   id: check-copyright
+        args: ["csrc", "mmdeploy", "tests", "demo", "tools"]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f8f8e4672..3ae98d745b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,11 @@ set(CMAKE_CXX_STANDARD 17)
 
 
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+if (MSVC)
+    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+else ()
+    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+endif ()
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
 # options
@@ -39,12 +43,20 @@ endif ()
 # notice that ubsan has linker issues for ubuntu < 18.04, see
 # https://stackoverflow.com/questions/50024731/ld-unrecognized-option-push-state-no-as-needed
 if (MMDEPLOY_UBSAN_ENABLE)
- add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fsanitize=undefined>)
- add_link_options(-fsanitize=undefined)
+    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fsanitize=undefined>)
+    add_link_options(-fsanitize=undefined)
+endif ()
+
+if (MSVC)
+    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/diagnostics:classic>)
+    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/Zc:preprocessor>) # /experimental:preprocessor on VS2017
+    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/wd4251>)
+else ()
+    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
 endif ()
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
-# set INTERFACE target to gather linked modules
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
+
 add_library(MMDeployStaticModules INTERFACE)
 add_library(MMDeployDynamicModules INTERFACE)
 add_library(MMDeployLibs INTERFACE)
@@ -52,25 +64,10 @@ add_library(MMDeployLibs INTERFACE)
 add_subdirectory(csrc)
 
 if (MMDEPLOY_BUILD_SDK)
-    # get static modules and dynamic modules from ${MMDeployStaticModules} and ${MMDeployDynamicModules}, respectively
-    set(STATIC_MODULES)
-    get_target_property(STATIC_MODULES MMDeployStaticModules INTERFACE_LINK_LIBRARIES)
-    get_target_list("${STATIC_MODULES}" FILTERED_MODULES)
-    set(MMDEPLOY_STATIC_MODULES "${FILTERED_MODULES}" CACHE STRING "MMDeploy's static modules")
-    message(STATUS "MMDEPLOY_STATIC_MODULES: ${MMDEPLOY_STATIC_MODULES}")
-
-    set(DYNAMIC_MODULES)
-    get_target_property(DYNAMIC_MODULES MMDeployDynamicModules INTERFACE_LINK_LIBRARIES)
-    get_target_list("${DYNAMIC_MODULES}" FILTERED_MODULES)
-    set(MMDEPLOY_DYNAMIC_MODULES "${FILTERED_MODULES}" CACHE STRING "MMDeploy's dynamic modules")
-    message(STATUS "MMDEPLOY_DYNAMIC_MODULES: ${MMDEPLOY_DYNAMIC_MODULES}")
-
-    # get libs from ${MMDeployLibs}
-    set(LIBS)
-    get_target_property(LIBS MMDeployLibs INTERFACE_LINK_LIBRARIES)
-    get_target_list("${LIBS}" FILTERED_LIBS)
-    set(MMDEPLOY_LIBS "${FILTERED_LIBS}" CACHE STRING "MMDeploy's libs that can be linked directly by application")
-    message(STATUS "MMDEPLOY_LIBS: ${MMDEPLOY_LIBS}")
+    install(TARGETS MMDeployStaticModules
+            MMDeployDynamicModules
+            MMDeployLibs
+            EXPORT MMDeployTargets)
 
     if (MMDEPLOY_BUILD_TEST)
         add_subdirectory(tests/test_csrc)
@@ -78,13 +75,11 @@ if (MMDEPLOY_BUILD_SDK)
 
     if (MMDEPLOY_BUILD_SDK_PYTHON_API)
         add_subdirectory(csrc/apis/python)
-    endif()
+    endif ()
 
     # export MMDeploy package
     install(EXPORT MMDeployTargets
-            # NAMESPACE mmdeploy::
             FILE MMDeployTargets.cmake
-            #EXPORT_LINK_INTERFACE_LIBRARIES
             DESTINATION lib/cmake/MMDeploy)
 
     include(CMakePackageConfigHelpers)
@@ -105,6 +100,8 @@ if (MMDEPLOY_BUILD_SDK)
     install(FILES
             ${CMAKE_CURRENT_BINARY_DIR}/MMDeployConfig.cmake
             ${CMAKE_CURRENT_BINARY_DIR}/MMDeployConfigVersion.cmake
+            ${CMAKE_CURRENT_SOURCE_DIR}/cmake/MMDeploy.cmake
+            ${CMAKE_CURRENT_SOURCE_DIR}/cmake/loader.cpp.in
             DESTINATION lib/cmake/MMDeploy
             )
 
diff --git a/README.md b/README.md
index ab05a73ad0..37a10deff3 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,7 @@ a part of the [OpenMMLab](https://openmmlab.com/) project.
   - [x] MMSegmentation
   - [x] MMEditing
   - [x] MMOCR
+  - [x] MMPose
 
 - **Multiple inference backends are available**
 
@@ -111,20 +112,21 @@ If you find this project useful in your research, please consider cite:
 ## Projects in OpenMMLab
 
 - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
-- [MIM](https://github.com/open-mmlab/mim): MIM Installs OpenMMLab Packages.
+- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages.
 - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
 - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
 - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
+- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
 - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
+- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
+- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
+- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
+- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
+- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
+- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
 - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
 - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
-- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
+- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
 - [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
-- [MMOCR](https://github.com/open-mmlab/mmocr): A Comprehensive Toolbox for Text Detection, Recognition and Understanding.
 - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
-- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
-- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab FewShot Learning Toolbox and Benchmark.
-- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab Human Pose and Shape Estimation Toolbox and Benchmark.
-- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning Toolbox and Benchmark.
-- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab Model Compression Toolbox and Benchmark.
-- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab Model Deployment Framework.
+- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 3ccf14e8af..12a6c4c5e3 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -46,6 +46,7 @@ MMDeploy 是一个开源深度学习模型部署工具箱，它是 [OpenMMLab](h
   - [x] MMSegmentation
   - [x] MMEditing
   - [x] MMOCR
+  - [x] MMPose
 
 - **支持多种推理后端**
 
@@ -114,18 +115,19 @@ MMDeploy 是一个开源深度学习模型部署工具箱，它是 [OpenMMLab](h
 - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱
 - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
 - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
+- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
 - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
-- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
-- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
-- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
-- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱
 - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
-- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
-- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
-- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
+- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
 - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
 - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
 - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
+- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
+- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
+- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
+- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
+- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱
+- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
 - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架
 
 ## 欢迎加入 OpenMMLab 社区
diff --git a/cmake/MMDeploy.cmake b/cmake/MMDeploy.cmake
new file mode 100644
index 0000000000..086b45681f
--- /dev/null
+++ b/cmake/MMDeploy.cmake
@@ -0,0 +1,151 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+function (mmdeploy_export NAME)
+    set(_LIB_DIR lib)
+    if (MSVC)
+        set(_LIB_DIR bin)
+    endif ()
+    install(TARGETS ${NAME}
+            EXPORT MMDeployTargets
+            ARCHIVE DESTINATION lib
+            LIBRARY DESTINATION ${_LIB_DIR}
+            RUNTIME DESTINATION bin)
+endfunction ()
+
+
+function (mmdeploy_add_library NAME)
+    cmake_parse_arguments(_MMDEPLOY "EXCLUDE" "" "" ${ARGN})
+    add_library(${NAME} ${_MMDEPLOY_UNPARSED_ARGUMENTS})
+    target_compile_definitions(${NAME} PRIVATE -DMMDEPLOY_API_EXPORTS=1)
+    get_target_property(_TYPE ${NAME} TYPE)
+    if (_TYPE STREQUAL STATIC_LIBRARY)
+        set_target_properties(${NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
+    elseif (_TYPE STREQUAL SHARED_LIBRARY)
+    else ()
+        message(FATAL_ERROR "unsupported type: ${_TYPE}")
+    endif ()
+    if (NOT _MMDEPLOY_EXCLUDE)
+        target_link_libraries(MMDeployLibs INTERFACE ${NAME})
+        mmdeploy_export(${NAME})
+    endif ()
+endfunction ()
+
+
+function (mmdeploy_add_module NAME)
+    # EXCLUDE: exclude from registering & exporting as SDK module
+    # LIBRARY: the module is also a library (add_libray with SHARED instead of MODULE)
+    cmake_parse_arguments(_MMDEPLOY "EXCLUDE;LIBRARY" "" "" ${ARGN})
+    # search for add_library keywords
+    cmake_parse_arguments(_KW "STATIC;SHARED;MODULE" "" "" ${_MMDEPLOY_UNPARSED_ARGUMENTS})
+
+    set(_MAYBE_MODULE)
+    # no library type specified
+    if (NOT (_KW_STATIC OR  _KW_SHARED OR _KW_MODULE))
+        # shared but not marked as a library, build module library so that no .lib dependency
+        # will be generated for MSVC
+        if (MSVC AND BUILD_SHARED_LIBS AND NOT _MMDEPLOY_LIBRARY)
+            set(_MAYBE_MODULE MODULE)
+        endif ()
+    endif ()
+
+    add_library(${NAME} ${_MAYBE_MODULE} ${_MMDEPLOY_UNPARSED_ARGUMENTS})
+
+    # automatically link mmdeploy::core if exists
+    if (TARGET mmdeploy::core)
+        target_link_libraries(${NAME} PRIVATE mmdeploy::core)
+    endif ()
+
+    # export public symbols when marked as a library
+    if (_MMDEPLOY_LIBRARY)
+        target_compile_definitions(${NAME} PRIVATE -DMMDEPLOY_API_EXPORTS=1)
+    endif ()
+
+    get_target_property(_TYPE ${NAME} TYPE)
+    if (_TYPE STREQUAL STATIC_LIBRARY)
+        set_target_properties(${NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
+        if (MSVC)
+            target_link_options(${NAME} INTERFACE "/WHOLEARCHIVE:${NAME}")
+        endif ()
+        # register static modules
+        if (NOT _MMDEPLOY_EXCLUDE)
+            target_link_libraries(MMDeployStaticModules INTERFACE ${NAME})
+        endif ()
+    elseif (_TYPE STREQUAL SHARED_LIBRARY OR _TYPE STREQUAL MODULE_LIBRARY)
+        # register dynamic modules
+        if (NOT _MMDEPLOY_EXCLUDE)
+            target_link_libraries(MMDeployDynamicModules INTERFACE ${NAME})
+        endif ()
+    else ()
+        message(FATAL_ERROR "unsupported type: ${_TYPE}")
+    endif ()
+    if (NOT _MMDEPLOY_EXCLUDE)
+        mmdeploy_export(${NAME})
+    endif ()
+endfunction ()
+
+
+function (_mmdeploy_flatten_modules RETVAL)
+    set(_RETVAL)
+    foreach (ARG IN LISTS ARGN)
+        get_target_property(TYPE ${ARG} TYPE)
+        if (TYPE STREQUAL "INTERFACE_LIBRARY")
+            get_target_property(LIBS ${ARG} INTERFACE_LINK_LIBRARIES)
+            if (LIBS)
+                # pattern for 3.17+
+                list(FILTER LIBS EXCLUDE REGEX "^::@")
+                # pattern for 3.13-3.16
+                list(TRANSFORM LIBS REPLACE "(.+)::@.*" "\\1")
+                list(APPEND _RETVAL ${LIBS})
+            endif ()
+        else ()
+            list(APPEND _RETVAL ${ARG})
+        endif ()
+    endforeach ()
+    set(${RETVAL} ${_RETVAL} PARENT_SCOPE)
+endfunction ()
+
+
+function (mmdeploy_load_static NAME)
+    if (MSVC)
+        target_link_libraries(${NAME} PRIVATE ${ARGN})
+    else ()
+        _mmdeploy_flatten_modules(_MODULE_LIST ${ARGN})
+        target_link_libraries(${NAME} PRIVATE
+                -Wl,--whole-archive
+                ${_MODULE_LIST}
+                -Wl,--no-whole-archive)
+    endif ()
+endfunction ()
+
+function (mmdeploy_load_dynamic NAME)
+    _mmdeploy_flatten_modules(_MODULE_LIST ${ARGN})
+    if (MSVC)
+        if (NOT _MODULE_LIST)
+            return ()
+        endif ()
+        # MSVC has nothing like "-Wl,--no-as-needed ... -Wl,--as-needed", as a
+        # workaround we build a static module which loads the dynamic modules
+        set(_MODULE_STR ${_MODULE_LIST})
+        list(TRANSFORM _MODULE_STR REPLACE "(.+)" "\"\\1\"")
+        string(JOIN ",\n        " _MODULE_STR ${_MODULE_STR})
+        set(_MMDEPLOY_DYNAMIC_MODULES ${_MODULE_STR})
+
+        set(_LOADER_NAME ${NAME}_loader)
+
+        add_dependencies(${NAME} ${_MODULE_LIST})
+
+        set(_LOADER_PATH ${CMAKE_BINARY_DIR}/${_LOADER_NAME}.cpp)
+        # ! CMAKE_CURRENT_FUNCTION_LIST_DIR requires cmake 3.17+
+        configure_file(
+                ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/loader.cpp.in
+                ${_LOADER_PATH})
+
+        mmdeploy_add_module(${_LOADER_NAME} STATIC EXCLUDE ${_LOADER_PATH})
+        mmdeploy_load_static(${NAME} ${_LOADER_NAME})
+    else ()
+        target_link_libraries(${NAME} PRIVATE
+                -Wl,--no-as-needed
+                ${_MODULE_LIST}
+                -Wl,--as-needed)
+    endif ()
+endfunction ()
diff --git a/cmake/MMDeployConfig.cmake.in b/cmake/MMDeployConfig.cmake.in
index 7a23a9e153..4bd05489e4 100644
--- a/cmake/MMDeployConfig.cmake.in
+++ b/cmake/MMDeployConfig.cmake.in
@@ -2,23 +2,26 @@
 
 cmake_minimum_required(VERSION 3.14)
 
-include ("${CMAKE_CURRENT_LIST_DIR}/MMDeployTargets.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/MMDeployTargets.cmake")
 
 set(MMDEPLOY_CODEBASES @MMDEPLOY_CODEBASES@)
 set(MMDEPLOY_TARGET_DEVICES @MMDEPLOY_TARGET_DEVICES@)
 set(MMDEPLOY_TARGET_BACKENDS @MMDEPLOY_TARGET_BACKENDS@)
 set(MMDEPLOY_BUILD_TYPE @CMAKE_BUILD_TYPE@)
-set(MMDEPLOY_STATIC_MODULES @MMDEPLOY_STATIC_MODULES@)
-set(MMDEPLOY_DYNAMIC_MODULES @MMDEPLOY_DYNAMIC_MODULES@)
 set(MMDEPLOY_BUILD_SHARED @BUILD_SHARED_LIBS@)
-set(MMDEPLOY_LIBS @MMDEPLOY_LIBS@)
 
 if (NOT MMDEPLOY_BUILD_SHARED)
     if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
+        set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
+        enable_language(CUDA)
         find_package(pplcv REQUIRED)
     endif ()
 endif ()
 
-set(MMDeploy_LIBS ${MMDEPLOY_LIBS}
-    -Wl,--no-as-needed ${MMDEPLOY_DYNAMIC_MODULES} -Wl,--as-needed
-    -Wl,--whole-archive ${MMDEPLOY_STATIC_MODULES} -Wl,--no-whole-archive)
+find_package(spdlog REQUIRED)
+find_package(OpenCV REQUIRED)
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package(Threads REQUIRED)
+
+include("${CMAKE_CURRENT_LIST_DIR}/MMDeploy.cmake")
diff --git a/cmake/common.cmake b/cmake/common.cmake
deleted file mode 100644
index fae162bfe2..0000000000
--- a/cmake/common.cmake
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-function(set_targets PROJECT_NAME OBJ_TARGET STATIC_TARGET SHARED_TARGET)
-    set(${OBJ_TARGET} ${PROJECT_NAME}_obj PARENT_SCOPE)
-    set(${STATIC_TARGET} ${PROJECT_NAME}_static PARENT_SCOPE)
-    set(${SHARED_TARGET} ${PROJECT_NAME} PARENT_SCOPE)
-endfunction()
-
-function(install_targets TARGET_NAMES)
-    foreach (TARGET_NAME ${TARGET_NAMES})
-        install(TARGETS ${TARGET_NAME}
-                ARCHIVE DESTINATION lib
-                LIBRARY DESTINATION lib
-                RUNTIME DESTINATION bin
-                )
-    endforeach ()
-endfunction()
-
-function(build_target TARGET_NAME TARGET_SRCS)
-    add_library(${TARGET_NAME} ${TARGET_SRCS})
-    set_target_properties(${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
-endfunction()
-
-# When the object target ${TARGET_NAME} has more than one source file,
-# "${SRCS_VARIABLE}" MUST be passed to ${TARGET_SRCS}. The quotation marks CANNOT be dismissed.
-function(build_object_target TARGET_NAME TARGET_SRCS)
-    add_library(${TARGET_NAME} OBJECT)
-    target_sources(${TARGET_NAME} PRIVATE ${TARGET_SRCS})
-    set_target_properties(${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
-endfunction()
-
-function(build_static_target TARGET_NAME OBJECT_TARGET LINK_TYPE)
-    add_library(${TARGET_NAME} STATIC $<TARGET_OBJECTS:${OBJECT_TARGET}>)
-    if (${LINK_TYPE} STREQUAL "PRIVATE")
-        target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "PUBLIC")
-        target_link_libraries(${TARGET_NAME} PUBLIC ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "INTERFACE")
-        target_link_libraries(${TARGET_NAME} INTERFACE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "")
-        target_link_libraries(${TARGET_NAME} ${OBJECT_TARGET})
-    else ()
-        message(FATAL_ERROR "Incorrect link type: ${LINK_TYPE}")
-    endif ()
-endfunction()
-
-function(build_shared_target TARGET_NAME OBJECT_TARGET LINK_TYPE)
-    add_library(${TARGET_NAME} SHARED $<TARGET_OBJECTS:${OBJECT_TARGET}>)
-    if (${LINK_TYPE} STREQUAL "PRIVATE")
-        target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "PUBLIC")
-        target_link_libraries(${TARGET_NAME} PUBLIC ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "INTERFACE")
-        target_link_libraries(${TARGET_NAME} INTERFACE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "")
-        target_link_libraries(${TARGET_NAME} ${OBJECT_TARGET})
-    else ()
-        message(FATAL_ERROR "Incorrect link type: ${LINK_TYPE}")
-    endif ()
-endfunction()
-
-function(build_module_target TARGET_NAME OBJECT_TARGET LINK_TYPE)
-    add_library(${TARGET_NAME} MODULE $<TARGET_OBJECTS:${OBJECT_TARGET}>)
-    if (${LINK_TYPE} STREQUAL "PRIVATE")
-        target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "PUBLIC")
-        target_link_libraries(${TARGET_NAME} PUBLIC ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "INTERFACE")
-        target_link_libraries(${TARGET_NAME} INTERFACE ${OBJECT_TARGET})
-    elseif (${LINK_TYPE} STREQUAL "")
-        target_link_libraries(${TARGET_NAME} ${OBJECT_TARGET})
-    else ()
-        message(FATAL_ERROR "Incorrect link type: ${LINK_TYPE}")
-    endif ()
-endfunction()
-
-
-function(export_target TARGET_NAME)
-    target_link_libraries(MMDeployLibs INTERFACE ${TARGET_NAME})
-    install(TARGETS ${TARGET_NAME}
-            EXPORT MMDeployTargets
-            ARCHIVE DESTINATION lib
-            LIBRARY DESTINATION lib
-            )
-endfunction()
-
-function(export_module TARGET_NAME)
-    get_target_property(TARGET_TYPE ${TARGET_NAME} TYPE)
-    if (${TARGET_TYPE} STREQUAL "STATIC_LIBRARY")
-        target_link_libraries(MMDeployStaticModules INTERFACE ${TARGET_NAME})
-    elseif (${TARGET_TYPE} STREQUAL "SHARED_LIBRARY")
-        target_link_libraries(MMDeployDynamicModules INTERFACE ${TARGET_NAME})
-    endif ()
-    install(TARGETS ${TARGET_NAME}
-            EXPORT MMDeployTargets
-            ARCHIVE DESTINATION lib
-            LIBRARY DESTINATION lib
-            )
-endfunction()
-
-function(get_target_list INPUT_TARGETS OUTPUT_TARGETS)
-    set(FILTERED_TARGETS)
-    foreach (INPUT_TARGET IN LISTS INPUT_TARGETS)
-        if (TARGET ${INPUT_TARGET})
-            list(APPEND FILTERED_TARGETS ${INPUT_TARGET})
-        endif()
-    endforeach ()
-    set(${OUTPUT_TARGETS} "${FILTERED_TARGETS}" PARENT_SCOPE)
-endfunction()
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 158e542e16..9fe42596c4 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -23,35 +23,41 @@ else ()
     set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
     set(CUDA_NVCC_FLAGS
             "${CUDA_NVCC_FLAGS} -Xcompiler=-fPIC,-Wall,-fvisibility=hidden")
-    set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=-fno-gnu-unique")
+    if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=-fno-gnu-unique")
+    endif ()
 endif ()
 
 enable_language(CUDA)
 
 # set virtual compute architecture and real ones
 set(_NVCC_FLAGS)
-set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52")
-if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "8")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
-endif ()
-if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "9")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
-endif ()
-if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "10")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_72,code=sm_72")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
-endif ()
-if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "11")
-    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
-    if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "1")
-        # cuda doesn't support `sm_86` until version 11.1
-        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
+if (NOT CMAKE_CUDA_ARCHITECTURES)
+    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52")
+    if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "8")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
+    endif ()
+    if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "9")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
+    endif ()
+    if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "10")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_72,code=sm_72")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
+    endif ()
+    if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "11")
+        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
+        if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "1")
+            # cuda doesn't support `sm_86` until version 11.1
+            set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
+        endif ()
     endif ()
 endif ()
 
 set(CUDA_NVCC_FLAGS_DEBUG "-g -O0")
 set(CUDA_NVCC_FLAGS_RELEASE "-O3")
 set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
-set(CMAKE_CUDA_STANDARD 14)
+if (NOT MSVC)
+    set(CMAKE_CUDA_STANDARD 14)
+endif ()
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_NVCC_FLAGS} ${_NVCC_FLAGS}")
diff --git a/cmake/loader.cpp.in b/cmake/loader.cpp.in
new file mode 100644
index 0000000000..6627d6e2e7
--- /dev/null
+++ b/cmake/loader.cpp.in
@@ -0,0 +1,39 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <Windows.h>
+
+#include <cstdio>
+
+namespace mmdeploy {
+namespace {
+
+void* mmdeploy_load_library(const char* name) {
+  fprintf(stderr, "loading %s ...\n", name);
+  auto handle = LoadLibraryA(name);
+  if (!handle) {
+    fprintf(stderr, "failed to load library %s\n", name);
+    return nullptr;
+  }
+  return handle;
+}
+
+// clang-format off
+
+class Loader {
+ public:
+  Loader() {
+    const char* modules[] = {
+        @_MMDEPLOY_DYNAMIC_MODULES@
+    };
+    for (const auto name : modules) {
+      mmdeploy_load_library(name);
+    }
+  }
+};
+
+// clang-format on
+
+static Loader loader;
+
+}  // namespace
+}  // namespace mmdeploy
diff --git a/configs/_base_/backends/torchscript.py b/configs/_base_/backends/torchscript.py
new file mode 100644
index 0000000000..754fe488ac
--- /dev/null
+++ b/configs/_base_/backends/torchscript.py
@@ -0,0 +1 @@
+backend_config = dict(type='torchscript')
diff --git a/configs/_base_/torchscript_config.py b/configs/_base_/torchscript_config.py
new file mode 100644
index 0000000000..b16a2e871d
--- /dev/null
+++ b/configs/_base_/torchscript_config.py
@@ -0,0 +1,6 @@
+ir_config = dict(
+    type='torchscript',
+    save_file='end2end.pt',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None)
diff --git a/configs/mmcls/classification_torchscript.py b/configs/mmcls/classification_torchscript.py
new file mode 100644
index 0000000000..559fd25c38
--- /dev/null
+++ b/configs/mmcls/classification_torchscript.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/torchscript_config.py', '../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None)
+codebase_config = dict(type='mmcls', task='Classification')
diff --git a/configs/mmdet/_base_/base_instance-seg_torchscript.py b/configs/mmdet/_base_/base_instance-seg_torchscript.py
new file mode 100644
index 0000000000..68eee07e72
--- /dev/null
+++ b/configs/mmdet/_base_/base_instance-seg_torchscript.py
@@ -0,0 +1,4 @@
+_base_ = ['./base_torchscript.py']
+
+ir_config = dict(output_names=['dets', 'labels', 'masks'])
+codebase_config = dict(post_processing=dict(export_postprocess_mask=False))
diff --git a/configs/mmdet/_base_/base_openvino_dynamic-300x300.py b/configs/mmdet/_base_/base_openvino_dynamic-300x300.py
new file mode 100644
index 0000000000..ae1116df91
--- /dev/null
+++ b/configs/mmdet/_base_/base_openvino_dynamic-300x300.py
@@ -0,0 +1,6 @@
+_base_ = ['./base_dynamic.py', '../../_base_/backends/openvino.py']
+
+onnx_config = dict(input_shape=None)
+
+backend_config = dict(
+    model_inputs=[dict(opt_shapes=dict(input=[1, 3, 300, 300]))])
diff --git a/configs/mmdet/_base_/base_torchscript.py b/configs/mmdet/_base_/base_torchscript.py
new file mode 100644
index 0000000000..7e0ecc8ae5
--- /dev/null
+++ b/configs/mmdet/_base_/base_torchscript.py
@@ -0,0 +1,16 @@
+_base_ = ['../../_base_/torchscript_config.py']
+
+ir_config = dict(output_names=['dets', 'labels'])
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='end2end',
+    post_processing=dict(
+        score_threshold=0.05,
+        confidence_threshold=0.005,  # for YOLOv3
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
diff --git a/configs/mmdet/detection/detection_openvino_dynamic-300x300.py b/configs/mmdet/detection/detection_openvino_dynamic-300x300.py
new file mode 100644
index 0000000000..1df7d12114
--- /dev/null
+++ b/configs/mmdet/detection/detection_openvino_dynamic-300x300.py
@@ -0,0 +1 @@
+_base_ = ['../_base_/base_openvino_dynamic.py']
diff --git a/configs/mmdet/detection/detection_torchscript.py b/configs/mmdet/detection/detection_torchscript.py
new file mode 100644
index 0000000000..69bfbd9c7f
--- /dev/null
+++ b/configs/mmdet/detection/detection_torchscript.py
@@ -0,0 +1,3 @@
+_base_ = [
+    '../_base_/base_torchscript.py', '../../_base_/backends/torchscript.py'
+]
diff --git a/configs/mmdet/instance-seg/instance-seg_torchscript.py b/configs/mmdet/instance-seg/instance-seg_torchscript.py
new file mode 100644
index 0000000000..ba8ad7e041
--- /dev/null
+++ b/configs/mmdet/instance-seg/instance-seg_torchscript.py
@@ -0,0 +1,4 @@
+_base_ = [
+    '../_base_/base_instance-seg_torchscript.py',
+    '../../_base_/backends/torchscript.py'
+]
diff --git a/configs/mmedit/super-resolution/super-resolution_torchscript.py b/configs/mmedit/super-resolution/super-resolution_torchscript.py
new file mode 100644
index 0000000000..8ebef20e34
--- /dev/null
+++ b/configs/mmedit/super-resolution/super-resolution_torchscript.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../../_base_/torchscript_config.py',
+    '../../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None)
+codebase_config = dict(type='mmedit', task='SuperResolution')
diff --git a/configs/mmocr/text-detection/text-detection_torchscript.py b/configs/mmocr/text-detection/text-detection_torchscript.py
new file mode 100644
index 0000000000..48a27d44eb
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_torchscript.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../../_base_/torchscript_config.py',
+    '../../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None)
+codebase_config = dict(type='mmocr', task='TextDetection')
diff --git a/configs/mmocr/text-recognition/text-recognition_torchscript.py b/configs/mmocr/text-recognition/text-recognition_torchscript.py
new file mode 100644
index 0000000000..14e9112e49
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_torchscript.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../../_base_/torchscript_config.py',
+    '../../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None)
+codebase_config = dict(type='mmocr', task='TextRecognition')
diff --git a/configs/mmseg/segmentation_torchscript.py b/configs/mmseg/segmentation_torchscript.py
new file mode 100644
index 0000000000..665f308ecc
--- /dev/null
+++ b/configs/mmseg/segmentation_torchscript.py
@@ -0,0 +1,6 @@
+_base_ = [
+    '../_base_/torchscript_config.py', '../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None)
+codebase_config = dict(type='mmseg', task='Segmentation')
diff --git a/csrc/CMakeLists.txt b/csrc/CMakeLists.txt
index 889e54bb5d..b14c81c136 100644
--- a/csrc/CMakeLists.txt
+++ b/csrc/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(backend_ops)
 
 if (MMDEPLOY_BUILD_SDK)
     add_subdirectory(core)
+    add_subdirectory(utils)
     add_subdirectory(archive)
     add_subdirectory(device)
     add_subdirectory(graph)
diff --git a/csrc/apis/c/CMakeLists.txt b/csrc/apis/c/CMakeLists.txt
index 81da0a3fb1..f1809995bb 100644
--- a/csrc/apis/c/CMakeLists.txt
+++ b/csrc/apis/c/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.14)
 project(capis)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 if ("all" IN_LIST MMDEPLOY_CODEBASES)
     set(TASK_LIST "classifier;detector;segmentor;text_detector;text_recognizer;restorer;model")
@@ -28,16 +28,13 @@ endif ()
 
 foreach (TASK ${TASK_LIST})
     set(TARGET_NAME mmdeploy_${TASK})
-    build_target(${TARGET_NAME} ${TASK}.cpp)
+    mmdeploy_add_library(${TARGET_NAME} ${TASK}.cpp)
     target_link_libraries(${TARGET_NAME} PRIVATE mmdeploy::core)
     target_include_directories(${TARGET_NAME} PUBLIC
             $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
             $<INSTALL_INTERFACE:include/c>)
-    export_target(${TARGET_NAME})
-
     install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TASK}.h
             DESTINATION include/c)
-
 endforeach ()
 
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/common.h
diff --git a/csrc/apis/c/classifier.cpp b/csrc/apis/c/classifier.cpp
index 9236f5eae0..ecdfaafc87 100644
--- a/csrc/apis/c/classifier.cpp
+++ b/csrc/apis/c/classifier.cpp
@@ -55,28 +55,28 @@ int mmdeploy_classifier_create_impl(ModelType&& m, const char* device_name, int
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
 }  // namespace
 
-MM_SDK_API int mmdeploy_classifier_create(mm_model_t model, const char* device_name, int device_id,
-                                          mm_handle_t* handle) {
+int mmdeploy_classifier_create(mm_model_t model, const char* device_name, int device_id,
+                               mm_handle_t* handle) {
   return mmdeploy_classifier_create_impl(*static_cast<Model*>(model), device_name, device_id,
                                          handle);
 }
 
-MM_SDK_API int mmdeploy_classifier_create_by_path(const char* model_path, const char* device_name,
-                                                  int device_id, mm_handle_t* handle) {
+int mmdeploy_classifier_create_by_path(const char* model_path, const char* device_name,
+                                       int device_id, mm_handle_t* handle) {
   return mmdeploy_classifier_create_impl(model_path, device_name, device_id, handle);
 }
 
-MM_SDK_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                         mm_class_t** results, int** result_count) {
+int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                              mm_class_t** results, int** result_count) {
   if (handle == nullptr || mats == nullptr || mat_count == 0) {
     return MM_E_INVALID_ARG;
   }
@@ -92,7 +92,7 @@ MM_SDK_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mat
     }
 
     auto output = classifier->Run(std::move(input)).value().front();
-    DEBUG("output: {}", output);
+    MMDEPLOY_DEBUG("output: {}", output);
 
     auto classify_outputs = from_value<vector<mmcls::ClassifyOutput>>(output);
 
@@ -124,20 +124,19 @@ MM_SDK_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mat
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
-MM_SDK_API void mmdeploy_classifier_release_result(mm_class_t* results, const int* result_count,
-                                                   int count) {
+void mmdeploy_classifier_release_result(mm_class_t* results, const int* result_count, int count) {
   delete[] results;
   delete[] result_count;
 }
 
-MM_SDK_API void mmdeploy_classifier_destroy(mm_handle_t handle) {
+void mmdeploy_classifier_destroy(mm_handle_t handle) {
   if (handle != nullptr) {
     auto classifier = static_cast<Handle*>(handle);
     delete classifier;
diff --git a/csrc/apis/c/classifier.h b/csrc/apis/c/classifier.h
index 6834b8e401..a2209792ba 100644
--- a/csrc/apis/c/classifier.h
+++ b/csrc/apis/c/classifier.h
@@ -10,6 +10,10 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mm_class_t {
   int label_id;
   float score;
@@ -25,8 +29,8 @@ typedef struct mm_class_t {
  * by \ref mmdeploy_classifier_destroy
  * @return status of creating classifier's handle
  */
-MM_SDK_API int mmdeploy_classifier_create(mm_model_t model, const char* device_name, int device_id,
-                                          mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_classifier_create(mm_model_t model, const char* device_name,
+                                            int device_id, mm_handle_t* handle);
 
 /**
  * @brief Create classifier's handle
@@ -37,8 +41,8 @@ MM_SDK_API int mmdeploy_classifier_create(mm_model_t model, const char* device_n
  * by \ref mmdeploy_classifier_destroy
  * @return status of creating classifier's handle
  */
-MM_SDK_API int mmdeploy_classifier_create_by_path(const char* model_path, const char* device_name,
-                                                  int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_classifier_create_by_path(const char* model_path, const char* device_name,
+                                                    int device_id, mm_handle_t* handle);
 
 /**
  * @brief Use classifier created by  \ref mmdeploy_classifier_create_by_path to get label
@@ -53,8 +57,8 @@ MM_SDK_API int mmdeploy_classifier_create_by_path(const char* model_path, const
  * mmdeploy_classifier_release_result
  * @return status of inference
  */
-MM_SDK_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                         mm_class_t** results, int** result_count);
+MMDEPLOY_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                                           mm_class_t** results, int** result_count);
 
 /**
  * @brief Release the inference result buffer created \ref mmdeploy_classifier_apply
@@ -62,13 +66,17 @@ MM_SDK_API int mmdeploy_classifier_apply(mm_handle_t handle, const mm_mat_t* mat
  * @param[in] result_count \p results size buffer
  * @param[in] count length of \p result_count
  */
-MM_SDK_API void mmdeploy_classifier_release_result(mm_class_t* results, const int* result_count,
-                                                   int count);
+MMDEPLOY_API void mmdeploy_classifier_release_result(mm_class_t* results, const int* result_count,
+                                                     int count);
 
 /**
  * @brief Destroy classifier's handle
  * @param[in] handle classifier's handle created by \ref mmdeploy_classifier_create_by_path
  */
-MM_SDK_API void mmdeploy_classifier_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_classifier_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_CLASSIFIER_H
diff --git a/csrc/apis/c/common.h b/csrc/apis/c/common.h
index 1809f77727..dc82d44292 100644
--- a/csrc/apis/c/common.h
+++ b/csrc/apis/c/common.h
@@ -3,9 +3,23 @@
 #ifndef MMDEPLOY_COMMON_H
 #define MMDEPLOY_COMMON_H
 
-#include <cstdint>
+#include <stdint.h>
 
-#define MM_SDK_API
+#ifndef MMDEPLOY_EXPORT
+#ifdef _MSC_VER
+#define MMDEPLOY_EXPORT __declspec(dllexport)
+#else
+#define MMDEPLOY_EXPORT __attribute__((visibility("default")))
+#endif
+#endif
+
+#ifndef MMDEPLOY_API
+#ifdef MMDEPLOY_API_EXPORTS
+#define MMDEPLOY_API MMDEPLOY_EXPORT
+#else
+#define MMDEPLOY_API
+#endif
+#endif
 
 // clang-format off
 
diff --git a/csrc/apis/c/detector.cpp b/csrc/apis/c/detector.cpp
index 190b8bf7d5..4dbb573f96 100644
--- a/csrc/apis/c/detector.cpp
+++ b/csrc/apis/c/detector.cpp
@@ -55,27 +55,27 @@ int mmdeploy_detector_create_impl(ModelType&& m, const char* device_name, int de
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
 }  // namespace
 
-MM_SDK_API int mmdeploy_detector_create(mm_model_t model, const char* device_name, int device_id,
-                                        mm_handle_t* handle) {
+int mmdeploy_detector_create(mm_model_t model, const char* device_name, int device_id,
+                             mm_handle_t* handle) {
   return mmdeploy_detector_create_impl(*static_cast<Model*>(model), device_name, device_id, handle);
 }
 
-MM_SDK_API int mmdeploy_detector_create_by_path(const char* model_path, const char* device_name,
-                                                int device_id, mm_handle_t* handle) {
+int mmdeploy_detector_create_by_path(const char* model_path, const char* device_name, int device_id,
+                                     mm_handle_t* handle) {
   return mmdeploy_detector_create_impl(model_path, device_name, device_id, handle);
 }
 
-MM_SDK_API int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                       mm_detect_t** results, int** result_count) {
+int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                            mm_detect_t** results, int** result_count) {
   if (handle == nullptr || mats == nullptr || mat_count == 0) {
     return MM_E_INVALID_ARG;
   }
@@ -91,7 +91,7 @@ MM_SDK_API int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats,
     }
 
     auto output = detector->Run(std::move(input)).value().front();
-    DEBUG("output: {}", output);
+    MMDEPLOY_DEBUG("output: {}", output);
 
     auto detector_outputs = from_value<vector<mmdet::DetectorOutput>>(output);
 
@@ -142,15 +142,14 @@ MM_SDK_API int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats,
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
-MM_SDK_API void mmdeploy_detector_release_result(mm_detect_t* results, const int* result_count,
-                                                 int count) {
+void mmdeploy_detector_release_result(mm_detect_t* results, const int* result_count, int count) {
   auto result_ptr = results;
   for (int i = 0; i < count; ++i) {
     for (int j = 0; j < result_count[i]; ++j, ++result_ptr) {
@@ -164,7 +163,7 @@ MM_SDK_API void mmdeploy_detector_release_result(mm_detect_t* results, const int
   delete[] result_count;
 }
 
-MM_SDK_API void mmdeploy_detector_destroy(mm_handle_t handle) {
+void mmdeploy_detector_destroy(mm_handle_t handle) {
   if (handle != nullptr) {
     auto detector = static_cast<Handle*>(handle);
     delete detector;
diff --git a/csrc/apis/c/detector.h b/csrc/apis/c/detector.h
index 59689dd0de..bfcf0a8acb 100644
--- a/csrc/apis/c/detector.h
+++ b/csrc/apis/c/detector.h
@@ -10,6 +10,10 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mm_instance_mask_t {
   char* data;
   int height;
@@ -32,8 +36,8 @@ typedef struct mm_detect_t {
  * @param[out] handle instance of a detector
  * @return status of creating detector's handle
  */
-MM_SDK_API int mmdeploy_detector_create(mm_model_t model, const char* device_name, int device_id,
-                                        mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_detector_create(mm_model_t model, const char* device_name, int device_id,
+                                          mm_handle_t* handle);
 
 /**
  * @brief Create detector's handle
@@ -43,8 +47,8 @@ MM_SDK_API int mmdeploy_detector_create(mm_model_t model, const char* device_nam
  * @param[out] handle instance of a detector
  * @return status of creating detector's handle
  */
-MM_SDK_API int mmdeploy_detector_create_by_path(const char* model_path, const char* device_name,
-                                                int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_detector_create_by_path(const char* model_path, const char* device_name,
+                                                  int device_id, mm_handle_t* handle);
 
 /**
  * @brief Apply detector to batch images and get their inference results
@@ -58,21 +62,25 @@ MM_SDK_API int mmdeploy_detector_create_by_path(const char* model_path, const ch
  * mmdeploy_detector_release_result
  * @return status of inference
  */
-MM_SDK_API int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                       mm_detect_t** results, int** result_count);
+MMDEPLOY_API int mmdeploy_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                                         mm_detect_t** results, int** result_count);
 
 /** @brief Release the inference result buffer created by \ref mmdeploy_detector_apply
  * @param[in] results detection results buffer
  * @param[in] result_count  \p results size buffer
  * @param[in] count length of \p result_count
  */
-MM_SDK_API void mmdeploy_detector_release_result(mm_detect_t* results, const int* result_count,
-                                                 int count);
+MMDEPLOY_API void mmdeploy_detector_release_result(mm_detect_t* results, const int* result_count,
+                                                   int count);
 
 /**
  * @brief Destroy detector's handle
  * @param[in] handle detector's handle created by \ref mmdeploy_detector_create_by_path
  */
-MM_SDK_API void mmdeploy_detector_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_detector_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_DETECTOR_H
diff --git a/csrc/apis/c/handle.h b/csrc/apis/c/handle.h
index 2af9c0da58..4223452995 100644
--- a/csrc/apis/c/handle.h
+++ b/csrc/apis/c/handle.h
@@ -20,12 +20,12 @@ class Handle {
     config["context"].update({{"device", device_}, {"stream", stream_}});
     auto creator = Registry<graph::Node>::Get().GetCreator("Pipeline");
     if (!creator) {
-      ERROR("failed to find Pipeline creator");
+      MMDEPLOY_ERROR("failed to find Pipeline creator");
       throw_exception(eEntryNotFound);
     }
     pipeline_ = creator->Create(config);
     if (!pipeline_) {
-      ERROR("create pipeline failed");
+      MMDEPLOY_ERROR("create pipeline failed");
       throw_exception(eFail);
     }
     pipeline_->Build(graph_);
diff --git a/csrc/apis/c/model.cpp b/csrc/apis/c/model.cpp
index 9834071c70..5101b92a47 100644
--- a/csrc/apis/c/model.cpp
+++ b/csrc/apis/c/model.cpp
@@ -1,11 +1,13 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "core/model.h"
+// clang-format off
+#include "model.h"
 
 #include <memory>
 
 #include "core/logger.h"
-#include "model.h"
+#include "core/model.h"
+// clang-format on
 
 using namespace mmdeploy;
 
@@ -15,9 +17,9 @@ int mmdeploy_model_create_by_path(const char *path, mm_model_t *model) {
     *model = ptr.release();
     return MM_SUCCESS;
   } catch (const std::exception &e) {
-    ERROR("failed to create model: {}", e.what());
+    MMDEPLOY_ERROR("failed to create model: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
@@ -28,9 +30,9 @@ int mmdeploy_model_create(const void *buffer, int size, mm_model_t *model) {
     *model = ptr.release();
     return MM_SUCCESS;
   } catch (const std::exception &e) {
-    ERROR("failed to create model: {}", e.what());
+    MMDEPLOY_ERROR("failed to create model: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
diff --git a/csrc/apis/c/model.h b/csrc/apis/c/model.h
index 731bb0270b..6151ba43a5 100644
--- a/csrc/apis/c/model.h
+++ b/csrc/apis/c/model.h
@@ -10,13 +10,17 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @brief Create SDK Model instance from given model path
  * @param[in] path model path
  * @param[out] model sdk model instance that must be destroyed by \ref mmdeploy_model_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_model_create_by_path(const char* path, mm_model_t* model);
+MMDEPLOY_API int mmdeploy_model_create_by_path(const char* path, mm_model_t* model);
 
 /**
  * @brief Create SDK Model instance from memory
@@ -25,13 +29,17 @@ MM_SDK_API int mmdeploy_model_create_by_path(const char* path, mm_model_t* model
  * @param[out] model sdk model instance that must be destroyed by \ref mmdeploy_model_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_model_create(const void* buffer, int size, mm_model_t* model);
+MMDEPLOY_API int mmdeploy_model_create(const void* buffer, int size, mm_model_t* model);
 
 /**
  * @brief Destroy model instance
  * @param[in] model sdk model instance created by \ref mmdeploy_model_create_by_path or \ref
  * mmdeploy_model_create
  */
-MM_SDK_API void mmdeploy_model_destroy(mm_model_t model);
+MMDEPLOY_API void mmdeploy_model_destroy(mm_model_t model);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_SRC_APIS_C_MODEL_H_
diff --git a/csrc/apis/c/restorer.cpp b/csrc/apis/c/restorer.cpp
index 0e12fa02c6..95e3679584 100644
--- a/csrc/apis/c/restorer.cpp
+++ b/csrc/apis/c/restorer.cpp
@@ -51,9 +51,9 @@ int mmdeploy_restorer_create_impl(ModelType &&m, const char *device_name, int de
     return MM_SUCCESS;
 
   } catch (const std::exception &e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
@@ -105,9 +105,9 @@ int mmdeploy_restorer_apply(mm_handle_t handle, const mm_mat_t *images, int coun
     *results = _results.release();
     return MM_SUCCESS;
   } catch (const std::exception &e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
diff --git a/csrc/apis/c/restorer.h b/csrc/apis/c/restorer.h
index 5d568cd418..4ae491a236 100644
--- a/csrc/apis/c/restorer.h
+++ b/csrc/apis/c/restorer.h
@@ -10,6 +10,10 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @brief Create a restorer instance
  * @param[in] model an instance of image restoration model created by
@@ -20,8 +24,8 @@
  * by \ref mmdeploy_restorer_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_restorer_create(mm_model_t model, const char* device_name, int device_id,
-                                        mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_restorer_create(mm_model_t model, const char* device_name, int device_id,
+                                          mm_handle_t* handle);
 
 /**
  * @brief Create a restorer instance
@@ -32,8 +36,8 @@ MM_SDK_API int mmdeploy_restorer_create(mm_model_t model, const char* device_nam
  * by \ref mmdeploy_restorer_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_restorer_create_by_path(const char* model_path, const char* device_name,
-                                                int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_restorer_create_by_path(const char* model_path, const char* device_name,
+                                                  int device_id, mm_handle_t* handle);
 
 /**
  * @brief Apply restorer to a batch of images
@@ -44,19 +48,23 @@ MM_SDK_API int mmdeploy_restorer_create_by_path(const char* model_path, const ch
  * by \ref mmdeploy_restorer_release_result
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_restorer_apply(mm_handle_t handle, const mm_mat_t* images, int count,
-                                       mm_mat_t** results);
+MMDEPLOY_API int mmdeploy_restorer_apply(mm_handle_t handle, const mm_mat_t* images, int count,
+                                         mm_mat_t** results);
 
 /** @brief Release result buffer returned by \ref mmdeploy_restorer_apply
  * @param[in] results result buffer by restorer
  * @param[in] count length of \p result
  */
-MM_SDK_API void mmdeploy_restorer_release_result(mm_mat_t* results, int count);
+MMDEPLOY_API void mmdeploy_restorer_release_result(mm_mat_t* results, int count);
 
 /**
  * @brief destroy restorer
  * @param[in] handle handle of restorer created by \ref mmdeploy_restorer_create_by_path
  */
-MM_SDK_API void mmdeploy_restorer_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_restorer_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_SRC_APIS_C_RESTORER_H_
diff --git a/csrc/apis/c/segmentor.cpp b/csrc/apis/c/segmentor.cpp
index 2c578de321..bcdca722a7 100644
--- a/csrc/apis/c/segmentor.cpp
+++ b/csrc/apis/c/segmentor.cpp
@@ -53,28 +53,28 @@ int mmdeploy_segmentor_create_impl(ModelType&& m, const char* device_name, int d
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
 }  // namespace
 
-MM_SDK_API int mmdeploy_segmentor_create(mm_model_t model, const char* device_name, int device_id,
-                                         mm_handle_t* handle) {
+int mmdeploy_segmentor_create(mm_model_t model, const char* device_name, int device_id,
+                              mm_handle_t* handle) {
   return mmdeploy_segmentor_create_impl(*static_cast<Model*>(model), device_name, device_id,
                                         handle);
 }
 
-MM_SDK_API int mmdeploy_segmentor_create_by_path(const char* model_path, const char* device_name,
-                                                 int device_id, mm_handle_t* handle) {
+int mmdeploy_segmentor_create_by_path(const char* model_path, const char* device_name,
+                                      int device_id, mm_handle_t* handle) {
   return mmdeploy_segmentor_create_impl(model_path, device_name, device_id, handle);
 }
 
-MM_SDK_API int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                        mm_segment_t** results) {
+int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                             mm_segment_t** results) {
   if (handle == nullptr || mats == nullptr || mat_count == 0 || results == nullptr) {
     return MM_E_INVALID_ARG;
   }
@@ -97,7 +97,7 @@ MM_SDK_API int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats
     auto results_ptr = _results.get();
     for (auto i = 0; i < mat_count; ++i, ++results_ptr) {
       auto& output_item = output[i];
-      DEBUG("the {}-th item in output: {}", i, output_item);
+      MMDEPLOY_DEBUG("the {}-th item in output: {}", i, output_item);
       auto segmentor_output = from_value<mmseg::SegmentorOutput>(output_item);
       results_ptr->height = segmentor_output.height;
       results_ptr->width = segmentor_output.width;
@@ -110,14 +110,14 @@ MM_SDK_API int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
-MM_SDK_API void mmdeploy_segmentor_release_result(mm_segment_t* results, int count) {
+void mmdeploy_segmentor_release_result(mm_segment_t* results, int count) {
   if (results == nullptr) {
     return;
   }
@@ -128,7 +128,7 @@ MM_SDK_API void mmdeploy_segmentor_release_result(mm_segment_t* results, int cou
   delete[] results;
 }
 
-MM_SDK_API void mmdeploy_segmentor_destroy(mm_handle_t handle) {
+void mmdeploy_segmentor_destroy(mm_handle_t handle) {
   if (handle != nullptr) {
     auto segmentor = static_cast<Handle*>(handle);
     delete segmentor;
diff --git a/csrc/apis/c/segmentor.h b/csrc/apis/c/segmentor.h
index 4abcd3cf68..741fbd9633 100644
--- a/csrc/apis/c/segmentor.h
+++ b/csrc/apis/c/segmentor.h
@@ -10,6 +10,10 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mm_segment_t {
   int height;   ///< height of \p mask that equals to the input image's height
   int width;    ///< width of \p mask that equals to the input image's width
@@ -28,8 +32,8 @@ typedef struct mm_segment_t {
  * by \ref mmdeploy_segmentor_destroy
  * @return status of creating segmentor's handle
  */
-MM_SDK_API int mmdeploy_segmentor_create(mm_model_t model, const char* device_name, int device_id,
-                                         mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_segmentor_create(mm_model_t model, const char* device_name, int device_id,
+                                           mm_handle_t* handle);
 
 /**
  * @brief Create segmentor's handle
@@ -40,8 +44,8 @@ MM_SDK_API int mmdeploy_segmentor_create(mm_model_t model, const char* device_na
  * by \ref mmdeploy_segmentor_destroy
  * @return status of creating segmentor's handle
  */
-MM_SDK_API int mmdeploy_segmentor_create_by_path(const char* model_path, const char* device_name,
-                                                 int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_segmentor_create_by_path(const char* model_path, const char* device_name,
+                                                   int device_id, mm_handle_t* handle);
 
 /**
  * @brief Apply segmentor to batch images and get their inference results
@@ -53,19 +57,23 @@ MM_SDK_API int mmdeploy_segmentor_create_by_path(const char* model_path, const c
  * image. It must be released by \ref mmdeploy_segmentor_release_result
  * @return status of inference
  */
-MM_SDK_API int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                        mm_segment_t** results);
+MMDEPLOY_API int mmdeploy_segmentor_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                                          mm_segment_t** results);
 
 /** @brief Release result buffer returned by \ref mmdeploy_segmentor_apply
  * @param[in] results result buffer
  * @param[in] count length of \p results
  */
-MM_SDK_API void mmdeploy_segmentor_release_result(mm_segment_t* results, int count);
+MMDEPLOY_API void mmdeploy_segmentor_release_result(mm_segment_t* results, int count);
 
 /**
  * @brief Destroy segmentor's handle
  * @param[in] handle segmentor's handle created by \ref mmdeploy_segmentor_create_by_path
  */
-MM_SDK_API void mmdeploy_segmentor_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_segmentor_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_SEGMENTOR_H
diff --git a/csrc/apis/c/text_detector.cpp b/csrc/apis/c/text_detector.cpp
index 1f0479f30d..6369044bfd 100644
--- a/csrc/apis/c/text_detector.cpp
+++ b/csrc/apis/c/text_detector.cpp
@@ -53,29 +53,28 @@ int mmdeploy_text_detector_create_impl(ModelType&& m, const char* device_name, i
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
 }  // namespace
 
-MM_SDK_API int mmdeploy_text_detector_create(mm_model_t model, const char* device_name,
-                                             int device_id, mm_handle_t* handle) {
+int mmdeploy_text_detector_create(mm_model_t model, const char* device_name, int device_id,
+                                  mm_handle_t* handle) {
   return mmdeploy_text_detector_create_impl(*static_cast<Model*>(model), device_name, device_id,
                                             handle);
 }
 
-MM_SDK_API int mmdeploy_text_detector_create_by_path(const char* model_path,
-                                                     const char* device_name, int device_id,
-                                                     mm_handle_t* handle) {
+int mmdeploy_text_detector_create_by_path(const char* model_path, const char* device_name,
+                                          int device_id, mm_handle_t* handle) {
   return mmdeploy_text_detector_create_impl(model_path, device_name, device_id, handle);
 }
 
-MM_SDK_API int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                            mm_text_detect_t** results, int** result_count) {
+int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
+                                 mm_text_detect_t** results, int** result_count) {
   if (handle == nullptr || mats == nullptr || mat_count == 0) {
     return MM_E_INVALID_ARG;
   }
@@ -91,7 +90,7 @@ MM_SDK_API int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t*
     }
 
     auto output = text_detector->Run(std::move(input)).value().front();
-    DEBUG("output: {}", output);
+    MMDEPLOY_DEBUG("output: {}", output);
 
     auto detector_outputs = from_value<std::vector<mmocr::TextDetectorOutput>>(output);
     vector<int> _result_count;
@@ -125,20 +124,20 @@ MM_SDK_API int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t*
     return MM_SUCCESS;
 
   } catch (const std::exception& e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
 
-MM_SDK_API void mmdeploy_text_detector_release_result(mm_text_detect_t* results,
-                                                      const int* result_count, int count) {
+void mmdeploy_text_detector_release_result(mm_text_detect_t* results, const int* result_count,
+                                           int count) {
   delete[] results;
   delete[] result_count;
 }
 
-MM_SDK_API void mmdeploy_text_detector_destroy(mm_handle_t handle) {
+void mmdeploy_text_detector_destroy(mm_handle_t handle) {
   if (handle != nullptr) {
     auto text_detector = static_cast<Handle*>(handle);
     delete text_detector;
diff --git a/csrc/apis/c/text_detector.h b/csrc/apis/c/text_detector.h
index 06cb78558c..0ca39b9003 100644
--- a/csrc/apis/c/text_detector.h
+++ b/csrc/apis/c/text_detector.h
@@ -10,6 +10,10 @@
 
 #include "common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mm_text_detect_t {
   mm_pointf_t bbox[4];  ///< a text bounding box of which the vertex are in clock-wise
   float score;
@@ -25,8 +29,8 @@ typedef struct mm_text_detect_t {
  * by \ref mmdeploy_text_detector_destroy
  * @return status of creating text-detector's handle
  */
-MM_SDK_API int mmdeploy_text_detector_create(mm_model_t model, const char* device_name,
-                                             int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_text_detector_create(mm_model_t model, const char* device_name,
+                                               int device_id, mm_handle_t* handle);
 
 /**
  * @brief Create text-detector's handle
@@ -37,9 +41,9 @@ MM_SDK_API int mmdeploy_text_detector_create(mm_model_t model, const char* devic
  * by \ref mmdeploy_text_detector_destroy
  * @return status of creating text-detector's handle
  */
-MM_SDK_API int mmdeploy_text_detector_create_by_path(const char* model_path,
-                                                     const char* device_name, int device_id,
-                                                     mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_text_detector_create_by_path(const char* model_path,
+                                                       const char* device_name, int device_id,
+                                                       mm_handle_t* handle);
 
 /**
  * @brief Apply text-detector to batch images and get their inference results
@@ -52,22 +56,27 @@ MM_SDK_API int mmdeploy_text_detector_create_by_path(const char* model_path,
  * results of each image. It must be released by \ref mmdeploy_detector_release_result
  * @return status of inference
  */
-MM_SDK_API int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t* mats, int mat_count,
-                                            mm_text_detect_t** results, int** result_count);
+MMDEPLOY_API int mmdeploy_text_detector_apply(mm_handle_t handle, const mm_mat_t* mats,
+                                              int mat_count, mm_text_detect_t** results,
+                                              int** result_count);
 
 /** @brief Release the inference result buffer returned by \ref mmdeploy_text_detector_apply
  * @param[in] results text detection result buffer
  * @param[in] result_count  \p results size buffer
  * @param[in] count the length of buffer \p result_count
  */
-MM_SDK_API void mmdeploy_text_detector_release_result(mm_text_detect_t* results,
-                                                      const int* result_count, int count);
+MMDEPLOY_API void mmdeploy_text_detector_release_result(mm_text_detect_t* results,
+                                                        const int* result_count, int count);
 
 /**
  * @brief Destroy text-detector's handle
  * @param[in] handle text-detector's handle created by \ref mmdeploy_text_detector_create_by_path or
  * \ref mmdeploy_text_detector_create
  */
-MM_SDK_API void mmdeploy_text_detector_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_text_detector_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_TEXT_DETECTOR_H
diff --git a/csrc/apis/c/text_recognizer.cpp b/csrc/apis/c/text_recognizer.cpp
index 1ecf29e220..9458712b54 100644
--- a/csrc/apis/c/text_recognizer.cpp
+++ b/csrc/apis/c/text_recognizer.cpp
@@ -74,9 +74,9 @@ int mmdeploy_text_recognizer_create_impl(ModelType &&m, const char *device_name,
     return MM_SUCCESS;
 
   } catch (const std::exception &e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
@@ -178,9 +178,9 @@ int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t *imag
     return MM_SUCCESS;
 
   } catch (const std::exception &e) {
-    ERROR("exception caught: {}", e.what());
+    MMDEPLOY_ERROR("exception caught: {}", e.what());
   } catch (...) {
-    ERROR("unknown exception caught");
+    MMDEPLOY_ERROR("unknown exception caught");
   }
   return MM_E_FAIL;
 }
diff --git a/csrc/apis/c/text_recognizer.h b/csrc/apis/c/text_recognizer.h
index d5bbd5e1ca..e257da5833 100644
--- a/csrc/apis/c/text_recognizer.h
+++ b/csrc/apis/c/text_recognizer.h
@@ -11,6 +11,10 @@
 #include "common.h"
 #include "text_detector.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct mm_text_recognize_t {
   char* text;
   float* score;
@@ -27,8 +31,8 @@ typedef struct mm_text_recognize_t {
  * by \ref mmdeploy_text_recognizer_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_text_recognizer_create(mm_model_t model, const char* device_name,
-                                               int device_id, mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_text_recognizer_create(mm_model_t model, const char* device_name,
+                                                 int device_id, mm_handle_t* handle);
 
 /**
  * @brief Create a text recognizer instance
@@ -39,9 +43,9 @@ MM_SDK_API int mmdeploy_text_recognizer_create(mm_model_t model, const char* dev
  * by \ref mmdeploy_text_recognizer_destroy
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_text_recognizer_create_by_path(const char* model_path,
-                                                       const char* device_name, int device_id,
-                                                       mm_handle_t* handle);
+MMDEPLOY_API int mmdeploy_text_recognizer_create_by_path(const char* model_path,
+                                                         const char* device_name, int device_id,
+                                                         mm_handle_t* handle);
 
 /**
  * @brief Apply text recognizer to a batch of text images
@@ -53,8 +57,8 @@ MM_SDK_API int mmdeploy_text_recognizer_create_by_path(const char* model_path,
  * by \ref mmdeploy_text_recognizer_release_result
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_text_recognizer_apply(mm_handle_t handle, const mm_mat_t* images, int count,
-                                              mm_text_recognize_t** results);
+MMDEPLOY_API int mmdeploy_text_recognizer_apply(mm_handle_t handle, const mm_mat_t* images,
+                                                int count, mm_text_recognize_t** results);
 
 /**
  * @brief Apply text recognizer to a batch of images supplied with text bboxes
@@ -68,23 +72,28 @@ MM_SDK_API int mmdeploy_text_recognizer_apply(mm_handle_t handle, const mm_mat_t
  * bboxes, must be release by \ref mmdeploy_text_recognizer_release_result
  * @return status code of the operation
  */
-MM_SDK_API int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t* images,
-                                                   int image_count, const mm_text_detect_t* bboxes,
-                                                   const int* bbox_count,
-                                                   mm_text_recognize_t** results);
+MMDEPLOY_API int mmdeploy_text_recognizer_apply_bbox(mm_handle_t handle, const mm_mat_t* images,
+                                                     int image_count,
+                                                     const mm_text_detect_t* bboxes,
+                                                     const int* bbox_count,
+                                                     mm_text_recognize_t** results);
 
 /** @brief Release result buffer returned by \ref mmdeploy_text_recognizer_apply or \ref
  * mmdeploy_text_recognizer_apply_bbox
  * @param[in] results result buffer by text recognizer
  * @param[in] count length of \p result
  */
-MM_SDK_API void mmdeploy_text_recognizer_release_result(mm_text_recognize_t* results, int count);
+MMDEPLOY_API void mmdeploy_text_recognizer_release_result(mm_text_recognize_t* results, int count);
 
 /**
  * @brief destroy text recognizer
  * @param[in] handle handle of text recognizer created by \ref
  * mmdeploy_text_recognizer_create_by_path or \ref mmdeploy_text_recognizer_create
  */
-MM_SDK_API void mmdeploy_text_recognizer_destroy(mm_handle_t handle);
+MMDEPLOY_API void mmdeploy_text_recognizer_destroy(mm_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif  // MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_
diff --git a/csrc/apis/python/CMakeLists.txt b/csrc/apis/python/CMakeLists.txt
index 1d98d84d4b..0730268f07 100644
--- a/csrc/apis/python/CMakeLists.txt
+++ b/csrc/apis/python/CMakeLists.txt
@@ -23,11 +23,10 @@ mmdeploy_python_add_module(restorer)
 
 pybind11_add_module(${PROJECT_NAME} ${MMDEPLOY_PYTHON_SRCS})
 
-target_link_libraries(${PROJECT_NAME} PRIVATE
-    ${MMDEPLOY_LIBS}
-    -Wl,--whole-archive ${MMDEPLOY_STATIC_MODULES} -Wl,--no-whole-archive
-    -Wl,--no-as-needed ${MMDEPLOY_DYNAMIC_MODULES} -Wl,--as-need)
+mmdeploy_load_static(${PROJECT_NAME} MMDeployStaticModules)
+mmdeploy_load_dynamic(${PROJECT_NAME} MMDeployDynamicModules)
+target_link_libraries(${PROJECT_NAME} PRIVATE MMDeployLibs)
 
 target_include_directories(${PROJECT_NAME} PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}
-    ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${CMAKE_CURRENT_SOURCE_DIR}/../..)
diff --git a/csrc/archive/CMakeLists.txt b/csrc/archive/CMakeLists.txt
index 2b34ab1bb8..2645974152 100644
--- a/csrc/archive/CMakeLists.txt
+++ b/csrc/archive/CMakeLists.txt
@@ -6,7 +6,6 @@ add_library(${PROJECT_NAME} INTERFACE)
 target_link_libraries(${PROJECT_NAME} INTERFACE mmdeploy::core)
 add_library(mmdeploy::archive ALIAS mmdeploy_archive)
 
-export_target(${PROJECT_NAME})
 install(DIRECTORY ${CMAKE_SOURCE_DIR}/src/archive
         DESTINATION include/cpp
         FILES_MATCHING PATTERN "*.h")
diff --git a/csrc/archive/json_archive.h b/csrc/archive/json_archive.h
index 31666d89be..6f137b9a7c 100644
--- a/csrc/archive/json_archive.h
+++ b/csrc/archive/json_archive.h
@@ -133,7 +133,7 @@ inline Value json_to_value(const nlohmann::json& json) {
       return value;
     }
     default:
-      ERROR("unsupported json type: {}", json.type_name());
+      MMDEPLOY_ERROR("unsupported json type: {}", json.type_name());
       return {};
   }
 }
diff --git a/csrc/archive/value_archive.h b/csrc/archive/value_archive.h
index 8500cbc424..f4115b3cb0 100644
--- a/csrc/archive/value_archive.h
+++ b/csrc/archive/value_archive.h
@@ -114,6 +114,8 @@ inline T from_value(const Value& value) {
   return x;
 }
 
+namespace detail {
+
 inline void load(ValueInputArchive& archive, Value& v) { archive.native(v); }
 
 template <class T, std::enable_if_t<std::is_same<std::decay_t<T>, Value>::value, bool> = true>
@@ -121,6 +123,8 @@ inline void save(ValueOutputArchive& archive, T&& v) {
   archive.native(std::forward<T>(v));
 }
 
+}  // namespace detail
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_ARCHIVE_VALUE_ARCHIVE_H_
diff --git a/csrc/backend_ops/CMakeLists.txt b/csrc/backend_ops/CMakeLists.txt
index 91117d2dbf..1537bd97fd 100644
--- a/csrc/backend_ops/CMakeLists.txt
+++ b/csrc/backend_ops/CMakeLists.txt
@@ -1,26 +1,28 @@
-set(CMAKE_CXX_STANDARD 14)
+if (NOT MSVC)
+    set(CMAKE_CXX_STANDARD 14)
+endif ()
 set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 
 # build ONNXRUNTIME ops
 if ("ort" IN_LIST MMDEPLOY_TARGET_BACKENDS)
-  if (NOT DEFINED ONNXRUNTIME_DIR)
-    set(ONNXRUNTIME_DIR $ENV{ONNXRUNTIME_DIR})
-  endif ()
-  if (NOT ONNXRUNTIME_DIR)
-    message(FATAL_ERROR " ONNXRUNTIME_DIR is not found.")
-  else ()
-    message(STATUS "Build ONNXRUNTIME custom ops.")
-    add_subdirectory(onnxruntime)
-  endif ()
+    if (NOT DEFINED ONNXRUNTIME_DIR)
+        set(ONNXRUNTIME_DIR $ENV{ONNXRUNTIME_DIR})
+    endif ()
+    if (NOT ONNXRUNTIME_DIR)
+        message(FATAL_ERROR " ONNXRUNTIME_DIR is not found.")
+    else ()
+        message(STATUS "Build ONNXRUNTIME custom ops.")
+        add_subdirectory(onnxruntime)
+    endif ()
 endif ()
 
 # build TensorRT ops
 if ("trt" IN_LIST MMDEPLOY_TARGET_BACKENDS)
-  if (NOT DEFINED TENSORRT_DIR)
-    set(TENSORRT_DIR $ENV{TENSORRT_DIR})
-  endif ()
-  message(STATUS "Build TensorRT custom ops.")
-  add_subdirectory(tensorrt)
+    if (NOT DEFINED TENSORRT_DIR)
+        set(TENSORRT_DIR $ENV{TENSORRT_DIR})
+    endif ()
+    message(STATUS "Build TensorRT custom ops.")
+    add_subdirectory(tensorrt)
 endif ()
 
 # build NCNN ops
@@ -28,3 +30,9 @@ if ("ncnn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
     message(STATUS "Build NCNN custom ops")
     add_subdirectory(ncnn)
 endif ()
+
+# build TorchScript ops
+if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS)
+  message(STATUS "Build torchsciprt custom ops")
+  add_subdirectory(torchscript)
+endif ()
diff --git a/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh b/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh
new file mode 100644
index 0000000000..02c57c62e6
--- /dev/null
+++ b/csrc/backend_ops/common/modulated_deform_conv/common_cuda_helper.cuh
@@ -0,0 +1,94 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#ifndef COMMON_CUDA_HELPER
+#define COMMON_CUDA_HELPER
+
+#include <cublas_v2.h>
+#include <cuda.h>
+
+#include <algorithm>
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 512
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = DIVUP(N, THREADS_PER_BLOCK);
+  int max_block_num = 4096;
+  return std::min(optimal_block_num, max_block_num);
+}
+
+#define cudaCheckError()                                                               \
+  {                                                                                    \
+    cudaError_t e = cudaGetLastError();                                                \
+    if (e != cudaSuccess) {                                                            \
+      printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e)); \
+      exit(0);                                                                         \
+    }                                                                                  \
+  }
+
+/**
+ * Returns a view of the original tensor with its dimensions permuted.
+ *
+ * @param[out] dst pointer to the destination tensor
+ * @param[in] src pointer to the source tensor
+ * @param[in] src_size shape of the src tensor
+ * @param[in] permute The desired ordering of dimensions
+ * @param[in] src_dim dim of src tensor
+ * @param[in] stream cuda stream handle
+ */
+template <class scalar_t>
+void memcpyPermute(scalar_t* dst, const scalar_t* src, int* src_size, int* permute, int src_dim,
+                   cudaStream_t stream = 0);
+
+template <typename scalar_t>
+cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa,
+                              cublasOperation_t transb, int m, int n, int k, const scalar_t* alpha,
+                              const scalar_t* A, int lda, const scalar_t* B, int ldb,
+                              const scalar_t* beta, scalar_t* C, int ldc);
+
+template <typename scalar_t>
+__device__ scalar_t bilinear_interpolate(const scalar_t* input, const int height, const int width,
+                                         scalar_t y, scalar_t x) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;
+
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;
+
+  int y_low = (int)y;
+  int x_low = (int)x;
+  int y_high;
+  int x_high;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (scalar_t)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (scalar_t)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  scalar_t ly = y - y_low;
+  scalar_t lx = x - x_low;
+  scalar_t hy = 1. - ly, hx = 1. - lx;
+  // do bilinear interpolation
+  scalar_t v1 = input[y_low * width + x_low];
+  scalar_t v2 = input[y_low * width + x_high];
+  scalar_t v3 = input[y_high * width + x_low];
+  scalar_t v4 = input[y_high * width + x_high];
+  scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  return val;
+}
+
+#endif  // COMMON_CUDA_HELPER
diff --git a/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h
new file mode 100644
index 0000000000..a37e243109
--- /dev/null
+++ b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cpu.h
@@ -0,0 +1,82 @@
+#include <cmath>
+#include <cstdint>
+
+template <typename T>
+T bilinear_interpolate_2d(const T *src, const int64_t src_h, const int64_t src_w, const T h,
+                          const T w) {
+  if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) {
+    return 0;
+  }
+
+  int64_t h_low = floor(h);
+  int64_t w_low = floor(w);
+  int64_t h_high = h_low + 1;
+  int64_t w_high = w_low + 1;
+
+  T lh = h - h_low;
+  T lw = w - w_low;
+  T hh = 1 - lh;
+  T hw = 1 - lw;
+
+  T v1 = 0;
+  if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low];
+  T v2 = 0;
+  if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high];
+  T v3 = 0;
+  if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low];
+  T v4 = 0;
+  if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high];
+
+  T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+// output: (channels * kernel_h * kernel_w, dst_h * dst_w)
+template <typename T>
+void deformable_im2col_2d(const T *input, const T *offset, const T *mask, const int64_t src_h,
+                          const int64_t src_w, const int64_t kernel_h, const int64_t kernel_w,
+                          const int64_t pad_h, const int64_t pad_w, const int64_t stride_h,
+                          const int64_t stride_w, const int64_t dilation_h,
+                          const int64_t dilation_w, const int64_t channels,
+                          const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w,
+                          const bool use_mask, T *columns) {
+  const int64_t workload = channels * dst_h * dst_w;
+  for (int64_t index = 0; index != workload; ++index) {
+    const int64_t ow = index % dst_w;
+    const int64_t oh = (index / dst_w) % dst_h;
+    const int64_t ic = index / (dst_w * dst_h);
+    const int64_t oc = ic * kernel_h * kernel_w;
+
+    int64_t c_per_offset_grp = channels / offset_groups;
+    const int64_t grp_idx = ic / c_per_offset_grp;
+
+    auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow);
+    auto input_ptr = input + ic * (src_h * src_w);
+    auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w;
+    auto mask_ptr = mask;
+    if (use_mask) {
+      mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w;
+    }
+
+    for (int64_t kh = 0; kh < kernel_h; ++kh) {
+      for (int64_t kw = 0; kw < kernel_w; ++kw) {
+        const int64_t mask_idx = kh * kernel_w + kw;
+        const int64_t offset_idx = 2 * mask_idx;
+
+        T mask_value = 1;
+        if (use_mask) {
+          mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow];
+        }
+
+        const T offset_h = offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow];
+        const T offset_w = offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow];
+        const T ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h;
+        const T iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w;
+        *columns_ptr = mask_value * bilinear_interpolate_2d<T>(input_ptr, src_h, src_w, ih, iw);
+        columns_ptr += dst_h * dst_w;
+      }
+    }
+  }
+}
diff --git a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh
similarity index 99%
rename from csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp
rename to csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh
index 2d78998a4d..3f4b2a55ce 100644
--- a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.hpp
+++ b/csrc/backend_ops/common/modulated_deform_conv/modulated_deform_conv_cuda.cuh
@@ -68,7 +68,7 @@
 
 #include <float.h>
 
-#include "common_cuda_helper.hpp"
+#include "common_cuda_helper.cuh"
 
 template <typename T>
 __device__ T dmcn_im2col_bilinear(const T *input, const int data_width, const int height,
diff --git a/csrc/backend_ops/ncnn/CMakeLists.txt b/csrc/backend_ops/ncnn/CMakeLists.txt
index 6345448e6a..9580d3b96c 100755
--- a/csrc/backend_ops/ncnn/CMakeLists.txt
+++ b/csrc/backend_ops/ncnn/CMakeLists.txt
@@ -4,21 +4,20 @@ cmake_minimum_required(VERSION 3.14)
 # ncnn
 find_package(ncnn)
 
-if(ncnn_FOUND)
-  message(STATUS "ncnn library found!")
-else()
-  message(FATAL_ERROR "Could not locate ncnn")
-endif()
+if (ncnn_FOUND)
+    message(STATUS "ncnn library found!")
+else ()
+    message(FATAL_ERROR "Could not locate ncnn")
+endif ()
 
-set_targets(mmdeploy_ncnn_ops NCNN_OPS_OBJ NCNN_OPS_STATIC NCNN_OPS_SHARED)
 
-if(NOT ANDROID AND NOT IOS)
-  add_subdirectory(ops)
-  add_subdirectory(onnx2ncnn)
-  add_subdirectory(pyncnn_ext)
-else()
-  # In case of embedded platform, like android, or ios, we only build custom ncnn
-  # ops, and leave the executable converter(onnx2ncnn, pyncnn_ext) built under
-  # the host platforms
-  add_subdirectory(ops)
-endif()
+if (NOT ANDROID AND NOT IOS)
+    add_subdirectory(ops)
+    add_subdirectory(onnx2ncnn)
+    add_subdirectory(pyncnn_ext)
+else ()
+    # In case of embedded platform, like android, or ios, we only build custom ncnn
+    # ops, and leave the executable converter(onnx2ncnn, pyncnn_ext) built under
+    # the host platforms
+    add_subdirectory(ops)
+endif ()
diff --git a/csrc/backend_ops/ncnn/ops/CMakeLists.txt b/csrc/backend_ops/ncnn/ops/CMakeLists.txt
index aa89729843..461301211a 100755
--- a/csrc/backend_ops/ncnn/ops/CMakeLists.txt
+++ b/csrc/backend_ops/ncnn/ops/CMakeLists.txt
@@ -2,18 +2,24 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_ncnn_ops)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 # add plugin source
 file(GLOB_RECURSE NCNN_OPS_SRCS *.cpp)
-build_object_target(${NCNN_OPS_OBJ} "${NCNN_OPS_SRCS}")
-target_link_libraries(${NCNN_OPS_OBJ} ncnn)
+add_library(${PROJECT_NAME}_obj OBJECT "${NCNN_OPS_SRCS}")
+target_compile_definitions(${PROJECT_NAME}_obj PRIVATE -DMMDEPLOY_API_EXPORTS=1)
+set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1)
+target_link_libraries(${PROJECT_NAME}_obj PRIVATE ncnn)
+set(_COMMON_INCLUDE_DIRS
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+        $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/csrc>)
+target_include_directories(${PROJECT_NAME}_obj
+        PUBLIC ${_COMMON_INCLUDE_DIRS})
+mmdeploy_export(${PROJECT_NAME}_obj)
 
-build_shared_target(${NCNN_OPS_SHARED} ${NCNN_OPS_OBJ} "PRIVATE")
-install_targets(${NCNN_OPS_SHARED})
+mmdeploy_add_library(${PROJECT_NAME} SHARED EXCLUDE "")
+target_link_libraries(${PROJECT_NAME} PRIVATE ${PROJECT_NAME}_obj)
+target_include_directories(${PROJECT_NAME}
+        PUBLIC ${_COMMON_INCLUDE_DIRS})
 
-if (MMDEPLOY_BUILD_SDK)
-    ## Build static library. SDK's uses it to build `ncnn_net` module
-    build_static_target(${NCNN_OPS_STATIC} ${NCNN_OPS_OBJ} "PRIVATE")
-    add_library(mmdeploy::ncnn_ops::static ALIAS ${NCNN_OPS_STATIC})
-endif ()
+add_library(mmdeploy::ncnn_ops ALIAS ${PROJECT_NAME})
diff --git a/csrc/backend_ops/ncnn/ops/ncnn_ops_register.h b/csrc/backend_ops/ncnn/ops/ncnn_ops_register.h
index 2fb07d8b0f..333f174e79 100755
--- a/csrc/backend_ops/ncnn/ops/ncnn_ops_register.h
+++ b/csrc/backend_ops/ncnn/ops/ncnn_ops_register.h
@@ -5,13 +5,12 @@
 #include <map>
 #include <string>
 
+#include "core/macro.h"
 #include "net.h"
 
-extern "C" {
-std::map<const char*, ncnn::layer_creator_func>& get_mmdeploy_layer_creator();
-std::map<const char*, ncnn::layer_destroyer_func>& get_mmdeploy_layer_destroyer();
+MMDEPLOY_API std::map<const char*, ncnn::layer_creator_func>& get_mmdeploy_layer_creator();
+MMDEPLOY_API std::map<const char*, ncnn::layer_destroyer_func>& get_mmdeploy_layer_destroyer();
 
-int register_mmdeploy_custom_layers(ncnn::Net& net);
-}
+MMDEPLOY_API int register_mmdeploy_custom_layers(ncnn::Net& net);
 
 #endif
diff --git a/csrc/backend_ops/ncnn/pyncnn_ext/CMakeLists.txt b/csrc/backend_ops/ncnn/pyncnn_ext/CMakeLists.txt
index f0d4148963..c55acb488f 100755
--- a/csrc/backend_ops/ncnn/pyncnn_ext/CMakeLists.txt
+++ b/csrc/backend_ops/ncnn/pyncnn_ext/CMakeLists.txt
@@ -6,9 +6,12 @@ project(ncnn_ext)
 if (NOT TARGET pybind11)
     add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/pybind11 pybind11)
 endif ()
-include_directories(${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
+
 pybind11_add_module(ncnn_ext ncnn_ext.cpp)
-target_link_libraries(ncnn_ext PUBLIC ncnn ${NCNN_OPS_SHARED})
-set_target_properties(
-        ncnn_ext PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-        ${CMAKE_SOURCE_DIR}/mmdeploy/backend/ncnn)
+
+target_link_libraries(ncnn_ext PUBLIC mmdeploy_ncnn_ops ncnn)
+set(_NCNN_EXT_DIR ${CMAKE_SOURCE_DIR}/mmdeploy/backend/ncnn)
+set_target_properties(ncnn_ext PROPERTIES
+        LIBRARY_OUTPUT_DIRECTORY ${_NCNN_EXT_DIR}
+        LIBRARY_OUTPUT_DIRECTORY_DEBUG ${_NCNN_EXT_DIR}
+        LIBRARY_OUTPUT_DIRECTORY_RELEASE ${_NCNN_EXT_DIR})
diff --git a/csrc/backend_ops/ncnn/pyncnn_ext/ncnn_ext.cpp b/csrc/backend_ops/ncnn/pyncnn_ext/ncnn_ext.cpp
index e9ff04eb41..ac158b9edb 100755
--- a/csrc/backend_ops/ncnn/pyncnn_ext/ncnn_ext.cpp
+++ b/csrc/backend_ops/ncnn/pyncnn_ext/ncnn_ext.cpp
@@ -1,7 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 #include <pybind11/pybind11.h>
 
-#include "../ops/ncnn_ops_register.h"
+#include "ncnn_ops_register.h"
 #include "net.h"
 
 PYBIND11_MODULE(ncnn_ext, m) {
diff --git a/csrc/backend_ops/onnxruntime/CMakeLists.txt b/csrc/backend_ops/onnxruntime/CMakeLists.txt
index f646bbc98a..b136781401 100644
--- a/csrc/backend_ops/onnxruntime/CMakeLists.txt
+++ b/csrc/backend_ops/onnxruntime/CMakeLists.txt
@@ -2,26 +2,24 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_onnxruntime_ops)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
-set_targets(${PROJECT_NAME} ORT_OPS_OBJ ORT_OPS_STATIC ORT_OPS_MODULE)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 # add plugin source
 file(GLOB_RECURSE ORT_OPS_SRCS *.cpp)
-build_object_target(${ORT_OPS_OBJ} "${ORT_OPS_SRCS}")
-target_include_directories(${ORT_OPS_OBJ} PUBLIC
+add_library(${PROJECT_NAME}_obj OBJECT "${ORT_OPS_SRCS}")
+target_compile_definitions(${PROJECT_NAME}_obj PRIVATE -DMMDEPLOY_API_EXPORTS=1)
+set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1)
+mmdeploy_export(${PROJECT_NAME}_obj)
+
+target_include_directories(${PROJECT_NAME}_obj PUBLIC
         $<BUILD_INTERFACE:${ONNXRUNTIME_DIR}/include>
-        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/common>)
-target_link_directories(${ORT_OPS_OBJ} PUBLIC
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/common>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../common>
+        $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/csrc>)
+target_link_directories(${PROJECT_NAME}_obj PUBLIC
         ${ONNXRUNTIME_DIR}/lib)
-target_link_libraries(${ORT_OPS_OBJ} PUBLIC onnxruntime)
-
-add_library(${ORT_OPS_MODULE} MODULE $<TARGET_OBJECTS:${ORT_OPS_OBJ}>)
-target_link_libraries(${ORT_OPS_MODULE} PRIVATE ${ORT_OPS_OBJ})
-add_library(mmdeploy::onnxruntime::ops ALIAS ${ORT_OPS_MODULE})
-install_targets(${ORT_OPS_MODULE})
+target_link_libraries(${PROJECT_NAME}_obj PUBLIC onnxruntime)
 
-if (MMDEPLOY_BUILD_SDK)
-    ## Build static library. SDK's uses it to build `ort_net` module
-    build_static_target(${ORT_OPS_STATIC} ${ORT_OPS_OBJ} "PRIVATE")
-    add_library(mmdeploy::onnxruntime::ops::static ALIAS ${ORT_OPS_STATIC})
-endif ()
+mmdeploy_add_library(${PROJECT_NAME} SHARED EXCLUDE "")
+target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_obj)
+add_library(mmdeploy::onnxruntime::ops ALIAS ${PROJECT_NAME})
diff --git a/csrc/backend_ops/onnxruntime/common/onnxruntime_register.h b/csrc/backend_ops/onnxruntime/common/onnxruntime_register.h
index 84318bc818..344031e791 100644
--- a/csrc/backend_ops/onnxruntime/common/onnxruntime_register.h
+++ b/csrc/backend_ops/onnxruntime/common/onnxruntime_register.h
@@ -3,11 +3,14 @@
 #define ONNXRUNTIME_REGISTER_H
 #include <onnxruntime_c_api.h>
 
+#include "core/macro.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, const OrtApiBase *api);
+MMDEPLOY_API OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options,
+                                                       const OrtApiBase *api);
 
 #ifdef __cplusplus
 }
diff --git a/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp b/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
index 5561752cd6..3df1217a37 100644
--- a/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
+++ b/csrc/backend_ops/onnxruntime/modulated_deform_conv/modulated_deform_conv.cpp
@@ -4,88 +4,11 @@
 #include <cmath>
 #include <vector>
 
+#include "modulated_deform_conv/modulated_deform_conv_cpu.h"
 #include "ort_utils.h"
 
 namespace mmdeploy {
 
-float bilinear_interpolate_2d(const float *src, const int64_t src_h, const int64_t src_w,
-                              const float h, const float w) {
-  if (h <= -1 || src_h <= h || w <= -1 || src_w <= w) {
-    return 0;
-  }
-
-  int64_t h_low = floor(h);
-  int64_t w_low = floor(w);
-  int64_t h_high = h_low + 1;
-  int64_t w_high = w_low + 1;
-
-  float lh = h - h_low;
-  float lw = w - w_low;
-  float hh = 1 - lh;
-  float hw = 1 - lw;
-
-  float v1 = 0;
-  if (h_low >= 0 && w_low >= 0) v1 = src[h_low * src_w + w_low];
-  float v2 = 0;
-  if (h_low >= 0 && w_high <= src_w - 1) v2 = src[h_low * src_w + w_high];
-  float v3 = 0;
-  if (h_high <= src_h - 1 && w_low >= 0) v3 = src[h_high * src_w + w_low];
-  float v4 = 0;
-  if (h_high <= src_h - 1 && w_high <= src_w - 1) v4 = src[h_high * src_w + w_high];
-
-  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
-
-  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
-  return val;
-}
-
-// output: (channels * kernel_h * kernel_w, dst_h * dst_w)
-void deformable_im2col_2d(const float *input, const float *offset, const float *mask,
-                          const int64_t src_h, const int64_t src_w, const int64_t kernel_h,
-                          const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w,
-                          const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h,
-                          const int64_t dilation_w, const int64_t channels,
-                          const int64_t offset_groups, const int64_t dst_h, const int64_t dst_w,
-                          const bool use_mask, float *columns) {
-  const int64_t workload = channels * dst_h * dst_w;
-  for (int64_t index = 0; index != workload; ++index) {
-    const int64_t ow = index % dst_w;
-    const int64_t oh = (index / dst_w) % dst_h;
-    const int64_t ic = index / (dst_w * dst_h);
-    const int64_t oc = ic * kernel_h * kernel_w;
-
-    int64_t c_per_offset_grp = channels / offset_groups;
-    const int64_t grp_idx = ic / c_per_offset_grp;
-
-    auto columns_ptr = columns + (oc * (dst_h * dst_w) + oh * dst_w + ow);
-    auto input_ptr = input + ic * (src_h * src_w);
-    auto offset_ptr = offset + grp_idx * 2 * kernel_h * kernel_w * dst_h * dst_w;
-    auto mask_ptr = mask;
-    if (use_mask) {
-      mask_ptr += grp_idx * kernel_h * kernel_w * dst_h * dst_w;
-    }
-
-    for (int64_t kh = 0; kh < kernel_h; ++kh) {
-      for (int64_t kw = 0; kw < kernel_w; ++kw) {
-        const int64_t mask_idx = kh * kernel_w + kw;
-        const int64_t offset_idx = 2 * mask_idx;
-
-        float mask_value = 1;
-        if (use_mask) {
-          mask_value = mask_ptr[mask_idx * (dst_h * dst_w) + oh * dst_w + ow];
-        }
-
-        const float offset_h = offset_ptr[offset_idx * (dst_h * dst_w) + oh * dst_w + ow];
-        const float offset_w = offset_ptr[(offset_idx + 1) * (dst_h * dst_w) + oh * dst_w + ow];
-        const float ih = (oh * stride_h - pad_h) + kh * dilation_h + offset_h;
-        const float iw = (ow * stride_w - pad_w) + kw * dilation_w + offset_w;
-        *columns_ptr = mask_value * bilinear_interpolate_2d(input_ptr, src_h, src_w, ih, iw);
-        columns_ptr += dst_h * dst_w;
-      }
-    }
-  }
-}
-
 void gemm_ref_fp32(const float *A, const float *B, const float *V, const float *H,
                    const int32_t trans_A, const int32_t trans_B, const int32_t M, const int32_t N,
                    const int32_t K, const float alpha, const float beta, float *Y) {
@@ -162,12 +85,12 @@ void deformable_conv2d_ref_fp32(const float *src, const float *offset, const flo
 
   for (int64_t b = 0; b < batch; ++b) {
     for (int64_t g = 0; g < group; ++g) {
-      deformable_im2col_2d(src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w,
-                           offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w,
-                           mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h,
-                           src_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
-                           dilation_w, ic_per_gp, offset_group, dst_h, dst_w, mask != nullptr,
-                           columns);
+      deformable_im2col_2d<float>(
+          src + b * src_c * src_h * src_w + g * ic_per_gp * src_h * src_w,
+          offset + b * offset_group * 2 * kernel_h * kernel_w * dst_h * dst_w,
+          mask + b * offset_group * kernel_h * kernel_w * dst_h * dst_w, src_h, src_w, kernel_h,
+          kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, ic_per_gp,
+          offset_group, dst_h, dst_w, mask != nullptr, columns);
       float *dst_ptr = dst + b * dst_c * dst_h * dst_w + g * oc_per_gp * dst_h * dst_w;
       if (bias != nullptr) {
         const float *bias_ptr = bias + g * oc_per_gp;
diff --git a/csrc/backend_ops/onnxruntime/onnxruntime_register.cpp b/csrc/backend_ops/onnxruntime/onnxruntime_register.cpp
index 9f2ce2cc0f..f7b9cedff8 100644
--- a/csrc/backend_ops/onnxruntime/onnxruntime_register.cpp
+++ b/csrc/backend_ops/onnxruntime/onnxruntime_register.cpp
@@ -7,7 +7,6 @@ const char *c_MMDeployOpDomain = "mmdeploy";
 
 OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, const OrtApiBase *api) {
   const OrtApi *kOrtApi = api->GetApi(ORT_API_VERSION);
-
   OrtStatus *status = nullptr;
   for (auto &_op_list_pair : mmdeploy::get_mmdeploy_custom_ops()) {
     OrtCustomOpDomain *domain = nullptr;
diff --git a/csrc/backend_ops/tensorrt/CMakeLists.txt b/csrc/backend_ops/tensorrt/CMakeLists.txt
index 88a0176df6..626fb6c92a 100644
--- a/csrc/backend_ops/tensorrt/CMakeLists.txt
+++ b/csrc/backend_ops/tensorrt/CMakeLists.txt
@@ -3,41 +3,37 @@ cmake_minimum_required(VERSION 3.14)
 include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake NO_POLICY_SCOPE)
 project(mmdeploy_tensorrt_ops CUDA CXX)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake NO_POLICY_SCOPE)
 include(${CMAKE_SOURCE_DIR}/cmake/tensorrt.cmake NO_POLICY_SCOPE)
 
-set_targets(${PROJECT_NAME} BACKEND_OPS_OBJ BACKEND_OPS_STATIC BACKEND_OPS_MODULE)
-
 # cub
 if (NOT DEFINED CUB_ROOT_DIR)
     if (CUDA_VERSION VERSION_LESS 11.0)
         set(CUB_ROOT_DIR "${CMAKE_SOURCE_DIR}/third_party/cub")
-    endif()
+    endif ()
 endif ()
 
 file(GLOB_RECURSE BACKEND_OPS_SRCS *.cpp *.cu)
-build_object_target(${BACKEND_OPS_OBJ} "${BACKEND_OPS_SRCS}")
-target_compile_definitions(${BACKEND_OPS_OBJ}
+add_library(${PROJECT_NAME}_obj OBJECT "${BACKEND_OPS_SRCS}")
+set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1)
+target_compile_definitions(${PROJECT_NAME}_obj
         PRIVATE -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=1)
-target_include_directories(${BACKEND_OPS_OBJ}
+target_include_directories(${PROJECT_NAME}_obj
+        PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
+target_include_directories(${PROJECT_NAME}_obj
         PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common)
-target_include_directories(${BACKEND_OPS_OBJ}
+target_include_directories(${PROJECT_NAME}_obj
         PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include)
-target_include_directories(${BACKEND_OPS_OBJ} PRIVATE ${TENSORRT_INCLUDE_DIR})
-target_include_directories(${BACKEND_OPS_OBJ} PRIVATE ${CUDNN_DIR}/include)
-target_include_directories(${BACKEND_OPS_OBJ} PRIVATE ${CUB_ROOT_DIR})
-target_link_directories(${BACKEND_OPS_OBJ} PUBLIC ${CUDNN_DIR}/lib64)
-target_link_libraries(${BACKEND_OPS_OBJ}
-        PRIVATE ${TENSORRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} cudnn)
+target_include_directories(${PROJECT_NAME}_obj PRIVATE ${TENSORRT_INCLUDE_DIR})
+target_include_directories(${PROJECT_NAME}_obj PRIVATE ${CUDNN_DIR}/include)
+target_include_directories(${PROJECT_NAME}_obj PRIVATE ${CUB_ROOT_DIR})
+target_link_directories(${PROJECT_NAME}_obj PUBLIC ${CUDNN_DIR}/lib64 ${CUDNN_DIR}/lib/x64)
+target_link_libraries(${PROJECT_NAME}_obj
+        PUBLIC ${TENSORRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} cudnn)
+mmdeploy_export(${PROJECT_NAME}_obj)
 
 # Build module library. It is used to convert onnx model to tensorrt engine
-build_module_target(${BACKEND_OPS_MODULE} ${BACKEND_OPS_OBJ} "PRIVATE")
-add_library(mmdeploy::tensorrt_ops ALIAS ${BACKEND_OPS_MODULE})
-install_targets(${BACKEND_OPS_MODULE})
-
-if (MMDEPLOY_BUILD_SDK)
-    ## Build static library. SDK's uses it to build `trt_net` module
-    build_static_target(${BACKEND_OPS_STATIC} ${BACKEND_OPS_OBJ} "PRIVATE")
-    add_library(mmdeploy::tensorrt_ops::static ALIAS ${BACKEND_OPS_STATIC})
-endif ()
+mmdeploy_add_module(${PROJECT_NAME} MODULE EXCLUDE "")
+target_link_libraries(${PROJECT_NAME} PRIVATE ${PROJECT_NAME}_obj)
+add_library(mmdeploy::tensorrt_ops ALIAS ${PROJECT_NAME})
diff --git a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu b/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu
index a5940b5e40..ed284e7809 100644
--- a/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu
+++ b/csrc/backend_ops/tensorrt/modulated_deform_conv/trt_modulated_deform_conv_kernel.cu
@@ -3,7 +3,7 @@
 #include <cuda_fp16.h>
 
 #include "common_cuda_helper.hpp"
-#include "trt_modulated_deform_conv_kernel.hpp"
+#include "modulated_deform_conv/modulated_deform_conv_cuda.cuh"
 #include "trt_plugin_helper.hpp"
 
 template <typename T>
diff --git a/csrc/backend_ops/torchscript/CMakeLists.txt b/csrc/backend_ops/torchscript/CMakeLists.txt
new file mode 100644
index 0000000000..e383129992
--- /dev/null
+++ b/csrc/backend_ops/torchscript/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+cmake_minimum_required(VERSION 3.14)
+
+add_subdirectory(ops)
+add_subdirectory(optimizer)
diff --git a/csrc/backend_ops/torchscript/bind.cpp b/csrc/backend_ops/torchscript/bind.cpp
new file mode 100644
index 0000000000..cfc08d1489
--- /dev/null
+++ b/csrc/backend_ops/torchscript/bind.cpp
@@ -0,0 +1,10 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "torch/script.h"
+
+TORCH_LIBRARY(mmdeploy, m) {
+  m.def(
+      "modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor "
+      "mask, "
+      "int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int "
+      "dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor");
+}
diff --git a/csrc/backend_ops/torchscript/ops/CMakeLists.txt b/csrc/backend_ops/torchscript/ops/CMakeLists.txt
new file mode 100644
index 0000000000..71c7256cd4
--- /dev/null
+++ b/csrc/backend_ops/torchscript/ops/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+cmake_minimum_required(VERSION 3.14)
+
+if("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
+    project(mmdeploy_torchscript_ops CUDA CXX)
+    include(${CMAKE_SOURCE_DIR}/cmake/cuda.cmake NO_POLICY_SCOPE)
+    file(GLOB_RECURSE BACKEND_OPS_SRCS *.cpp *.cu)
+else()
+    project(mmdeploy_torchscript_ops CXX)
+    file(GLOB_RECURSE BACKEND_OPS_SRCS *.cpp)
+endif()
+
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
+find_package(Torch REQUIRED)
+
+
+add_library(${PROJECT_NAME}_obj OBJECT "${BACKEND_OPS_SRCS}")
+set_target_properties(${PROJECT_NAME}_obj PROPERTIES POSITION_INDEPENDENT_CODE 1)
+target_compile_definitions(${PROJECT_NAME}_obj
+    PRIVATE -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=1)
+target_include_directories(${PROJECT_NAME}_obj
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../common)
+target_include_directories(${PROJECT_NAME}_obj
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common)
+
+if("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
+    target_include_directories(${PROJECT_NAME}_obj
+        PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include)
+endif()
+target_link_libraries(${PROJECT_NAME}_obj PRIVATE ${TORCH_LIBRARIES})
+mmdeploy_export(${PROJECT_NAME}_obj)
+
+# Build module library. It is used to inference with torchscript
+mmdeploy_add_module(${PROJECT_NAME} MODULE EXCLUDE "")
+target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_obj)
+add_library(mmdeploy::torchscript_ops ALIAS ${PROJECT_NAME})
diff --git a/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp
new file mode 100644
index 0000000000..c6d980919f
--- /dev/null
+++ b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cpu.cpp
@@ -0,0 +1,94 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "modulated_deform_conv/modulated_deform_conv_cpu.h"
+
+#include "torch/script.h"
+
+namespace mmdeploy {
+
+void modulated_deformable_im2col_cpu(
+    const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int64_t batch_size, const int64_t channels, const int64_t height_im,
+    const int64_t width_im, const int64_t height_col, const int64_t width_col,
+    const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w,
+    const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h,
+    const int64_t dilation_w, int64_t deformable_group, at::Tensor data_col) {
+  // num_axes should be smaller than block size
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_cpu", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+
+        deformable_im2col_2d<scalar_t>(data_im_, data_offset_, data_mask_, height_im, width_im,
+                                       kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+                                       dilation_h, dilation_w, channels, deformable_group,
+                                       height_col, width_col, data_mask_ != nullptr, data_col_);
+      }));
+}
+
+at::Tensor modulated_deform_conv_forward_cpu(at::Tensor input, at::Tensor weight, at::Tensor bias,
+                                             at::Tensor offset, at::Tensor mask, int64_t kernel_h,
+                                             int64_t kernel_w, int64_t stride_h, int64_t stride_w,
+                                             int64_t pad_h, int64_t pad_w, int64_t dilation_h,
+                                             int64_t dilation_w, int64_t group,
+                                             int64_t deformable_group, bool with_bias) {
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w,
+             kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels,
+             channels_kernel * group);
+
+  const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  // resize output
+  at::Tensor output =
+      at::zeros({batch, group, channels_out / group, height_out, width_out}, input.options());
+  // resize temporary columns
+  at::Tensor columns = at::zeros(
+      {group, channels * kernel_h * kernel_w / group, 1 * height_out * width_out}, input.options());
+
+  // divide into group
+  weight =
+      weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)});
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cpu(input[b], offset[b], mask[b], 1, channels, height, width,
+                                    height_out, width_out, kernel_h, kernel_w, pad_h, pad_w,
+                                    stride_h, stride_w, dilation_h, dilation_w, deformable_group,
+                                    columns);
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] =
+          output[b][g].flatten(1).addmm_(weight[g].flatten(1), columns[g]).view_as(output[b][g]);
+    }
+  }
+
+  output = output.view(
+      {output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+
+  return output;
+}
+
+TORCH_LIBRARY_IMPL(mmdeploy, CPU, m) {
+  m.impl("modulated_deform_conv", modulated_deform_conv_forward_cpu);
+}
+}  // namespace mmdeploy
diff --git a/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu
new file mode 100644
index 0000000000..3f9b6aef08
--- /dev/null
+++ b/csrc/backend_ops/torchscript/ops/modulated_deform_conv/modulated_deform_conv_cuda.cu
@@ -0,0 +1,97 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "c10/cuda/CUDAStream.h"
+#include "modulated_deform_conv/modulated_deform_conv_cuda.cuh"
+#include "torch/script.h"
+
+namespace mmdeploy {
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+    const int64_t batch_size, const int64_t channels, const int64_t height_im,
+    const int64_t width_im, const int64_t height_col, const int64_t width_col,
+    const int64_t kernel_h, const int64_t kernel_w, const int64_t pad_h, const int64_t pad_w,
+    const int64_t stride_h, const int64_t stride_w, const int64_t dilation_h,
+    const int64_t dilation_w, const int64_t deformable_group, at::Tensor data_col) {
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_cuda", ([&] {
+        const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
+        modulated_deformable_im2col_gpu_kernel<scalar_t>
+            <<<GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>(
+                num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h,
+                kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+                channel_per_deformable_group, batch_size, channels, deformable_group, height_col,
+                width_col, data_col_);
+      }));
+}
+
+at::Tensor modulated_deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, at::Tensor bias,
+                                              at::Tensor offset, at::Tensor mask, int64_t kernel_h,
+                                              int64_t kernel_w, int64_t stride_h, int64_t stride_w,
+                                              int64_t pad_h, int64_t pad_w, int64_t dilation_h,
+                                              int64_t dilation_w, int64_t group,
+                                              int64_t deformable_group, bool with_bias) {
+  at::DeviceGuard guard(input.device());
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).", kernel_h_, kernel_w,
+             kernel_h_, kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).", channels,
+             channels_kernel * group);
+
+  const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  // resize output
+  at::Tensor output =
+      at::zeros({batch, group, channels_out / group, height_out, width_out}, input.options());
+  // resize temporary columns
+  at::Tensor columns = at::zeros(
+      {group, channels * kernel_h * kernel_w / group, 1 * height_out * width_out}, input.options());
+
+  // divide into group
+  weight =
+      weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)});
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(input[b], offset[b], mask[b], 1, channels, height, width,
+                                     height_out, width_out, kernel_h, kernel_w, pad_h, pad_w,
+                                     stride_h, stride_w, dilation_h, dilation_w, deformable_group,
+                                     columns);
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] =
+          output[b][g].flatten(1).addmm_(weight[g].flatten(1), columns[g]).view_as(output[b][g]);
+    }
+  }
+
+  output = output.view(
+      {output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+
+  return output;
+}
+
+TORCH_LIBRARY_IMPL(mmdeploy, CUDA, m) {
+  m.impl("modulated_deform_conv", modulated_deform_conv_forward_cuda);
+}
+}  // namespace mmdeploy
diff --git a/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt b/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt
new file mode 100644
index 0000000000..8f3cb46d71
--- /dev/null
+++ b/csrc/backend_ops/torchscript/optimizer/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+cmake_minimum_required(VERSION 3.14)
+project(ts_optimizer)
+
+find_package(Torch REQUIRED)
+if (NOT TARGET pybind11)
+    add_subdirectory(${CMAKE_SOURCE_DIR}/third_party/pybind11 pybind11)
+endif ()
+
+file(GLOB_RECURSE OPTIMIZER_SRCS *.cpp)
+
+pybind11_add_module(${PROJECT_NAME} ${OPTIMIZER_SRCS})
+target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
+target_link_directories(${PROJECT_NAME} PRIVATE mmdeploy::torchscript_ops)
+set_target_properties(
+        ${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+        ${CMAKE_SOURCE_DIR}/mmdeploy/backend/torchscript)
diff --git a/csrc/backend_ops/torchscript/optimizer/bind.cpp b/csrc/backend_ops/torchscript/optimizer/bind.cpp
new file mode 100644
index 0000000000..73594776a3
--- /dev/null
+++ b/csrc/backend_ops/torchscript/optimizer/bind.cpp
@@ -0,0 +1,26 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "optimizer.h"
+
+void optimize_for_backend(torch::jit::Module& model, const std::string& ir = "torchscript",
+                          const std::string& backend = "torchscript") {
+  if (ir == "torchscript") {
+    model = mmdeploy::optimize_for_torchscript(model);
+  } else if (ir == "onnx") {
+    model = mmdeploy::optimize_for_onnx(model);
+  } else {
+    fprintf(stderr, "No optimize for combination ir: %s backend: %s\n", ir.c_str(),
+            backend.c_str());
+    exit(-1);
+  }
+}
+
+PYBIND11_MODULE(ts_optimizer, m) {
+  namespace py = pybind11;
+  m.def("optimize_for_backend", optimize_for_backend, py::arg("module"),
+        py::arg("ir") = std::string("torchscript"),
+        py::arg("backend") = std::string("torchscript"));
+}
diff --git a/csrc/backend_ops/torchscript/optimizer/optimizer.cpp b/csrc/backend_ops/torchscript/optimizer/optimizer.cpp
new file mode 100644
index 0000000000..05ef9d54cd
--- /dev/null
+++ b/csrc/backend_ops/torchscript/optimizer/optimizer.cpp
@@ -0,0 +1,70 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "optimizer.h"
+
+#include <torch/csrc/jit/passes/canonicalize_graph_fuser_ops.h>
+#include <torch/csrc/jit/passes/common_subexpression_elimination.h>
+#include <torch/csrc/jit/passes/constant_pooling.h>
+#include <torch/csrc/jit/passes/constant_propagation.h>
+#include <torch/csrc/jit/passes/dead_code_elimination.h>
+#include <torch/csrc/jit/passes/freeze_module.h>
+#include <torch/csrc/jit/passes/frozen_graph_optimizations.h>
+#include <torch/csrc/jit/passes/peephole.h>
+#include <torch/csrc/jit/passes/remove_expands.h>
+
+#if TORCH_VERSION_MINOR >= 9
+#include <torch/csrc/jit/passes/frozen_conv_add_relu_fusion.h>
+#include <torch/csrc/jit/passes/frozen_linear_transpose.h>
+#include <torch/csrc/jit/passes/frozen_ops_to_mkldnn.h>
+#endif
+
+namespace mmdeploy {
+
+using torch::jit::Graph;
+const std::shared_ptr<Graph>& required_passes(const std::shared_ptr<Graph>& graph) {
+  RemoveExpands(graph);
+  CanonicalizeOps(graph);
+  EliminateDeadCode(graph);
+  return graph;
+}
+
+Module optimize_for_torchscript(const Module& model) {
+  auto frozen_model = freeze_module(model);
+  auto graph = frozen_model.get_method("forward").graph();
+  OptimizeFrozenGraph(graph, true);
+
+#if TORCH_VERSION_MINOR >= 9
+  FuseFrozenConvAddRelu(graph);
+  ConvertFrozenOpsToMKLDNN(graph);
+  FrozenLinearTranspose(graph);
+#endif
+
+  graph = required_passes(graph);
+  EliminateCommonSubexpression(graph);
+  PeepholeOptimize(graph);
+  ConstantPropagation(graph);
+  ConstantPooling(graph);
+
+  // TODO: add more custom passes
+
+  return frozen_model;
+}
+
+Module optimize_for_onnx(const Module& model) {
+  auto frozen_model = freeze_module(model, {"training"});
+  auto graph = frozen_model.get_method("forward").graph();
+  OptimizeFrozenGraph(graph, true);
+
+#if TORCH_VERSION_MINOR >= 9
+  FuseFrozenConvAddRelu(graph);
+  ConvertFrozenOpsToMKLDNN(graph);
+  FrozenLinearTranspose(graph);
+#endif
+
+  // TODO: add more custom passes
+
+  return frozen_model;
+}
+
+// TODO: add optimizer for other backend/onnx
+
+}  // namespace mmdeploy
diff --git a/csrc/backend_ops/torchscript/optimizer/optimizer.h b/csrc/backend_ops/torchscript/optimizer/optimizer.h
new file mode 100644
index 0000000000..d0d91c627d
--- /dev/null
+++ b/csrc/backend_ops/torchscript/optimizer/optimizer.h
@@ -0,0 +1,10 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+#include <torch/script.h>
+
+namespace mmdeploy {
+using torch::jit::script::Module;
+
+Module optimize_for_torchscript(const Module &model);
+
+Module optimize_for_onnx(const Module &model);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/CMakeLists.txt b/csrc/codebase/CMakeLists.txt
index 023be7668d..9ef6490a8c 100644
--- a/csrc/codebase/CMakeLists.txt
+++ b/csrc/codebase/CMakeLists.txt
@@ -9,8 +9,8 @@ if ("all" IN_LIST MMDEPLOY_CODEBASES)
     list(APPEND CODEBASES "mmseg")
     list(APPEND CODEBASES "mmocr")
     list(APPEND CODEBASES "mmedit")
-else()
-    set (CODEBASES ${MMDEPLOY_CODEBASES})
+else ()
+    set(CODEBASES ${MMDEPLOY_CODEBASES})
 endif ()
 
 foreach (codebase IN LISTS CODEBASES)
diff --git a/csrc/codebase/common.h b/csrc/codebase/common.h
index b0b164ddb7..c815aa11f7 100644
--- a/csrc/codebase/common.h
+++ b/csrc/codebase/common.h
@@ -14,7 +14,7 @@ namespace mmdeploy {
 class Context {
  public:
   explicit Context(const Value& config) {
-    DEBUG("config: {}", config);
+    MMDEPLOY_DEBUG("config: {}", config);
     device_ = config["context"]["device"].get<Device>();
     stream_ = config["context"]["stream"].get<Stream>();
   }
@@ -35,17 +35,17 @@ class CodebaseCreator : public Creator<Module> {
   std::unique_ptr<Module> Create(const Value& cfg) override {
     constexpr auto key{"component"};
     if (!cfg.contains(key)) {
-      ERROR("no key '{}' in config {}", key, cfg);
+      MMDEPLOY_ERROR("no key '{}' in config {}", key, cfg);
       throw_exception(eInvalidArgument);
     }
     if (!cfg[key].is_string()) {
-      ERROR("key '{}' is not a string", key);
+      MMDEPLOY_ERROR("key '{}' is not a string", key);
       throw_exception(eInvalidArgument);
     }
     auto postprocess_type = cfg[key].get<std::string>();
     auto creator = Registry<Tag>::Get().GetCreator(postprocess_type);
     if (creator == nullptr) {
-      ERROR("could not found entry '{}' in {}", postprocess_type, Tag::name);
+      MMDEPLOY_ERROR("could not found entry '{}' in {}", postprocess_type, Tag::name);
       throw_exception(eEntryNotFound);
     }
     return creator->Create(cfg);
diff --git a/csrc/codebase/mmcls/CMakeLists.txt b/csrc/codebase/mmcls/CMakeLists.txt
index c2b254149c..259b653cd1 100644
--- a/csrc/codebase/mmcls/CMakeLists.txt
+++ b/csrc/codebase/mmcls/CMakeLists.txt
@@ -2,10 +2,8 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_mmcls)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 add_library(mmdeploy::mmcls ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/codebase/mmcls/linear_cls.cpp b/csrc/codebase/mmcls/linear_cls.cpp
index 07704cd088..8b14f4e926 100644
--- a/csrc/codebase/mmcls/linear_cls.cpp
+++ b/csrc/codebase/mmcls/linear_cls.cpp
@@ -1,5 +1,6 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <algorithm>
 #include <numeric>
 
 #include "codebase/mmcls/mmcls.h"
@@ -18,19 +19,19 @@ class LinearClsHead : public MMClassification {
     if (cfg.contains("params")) {
       topk_ = cfg["params"].value("topk", 1);
       if (topk_ <= 0) {
-        ERROR("'topk' should be greater than 0, but got '{}'", topk_);
+        MMDEPLOY_ERROR("'topk' should be greater than 0, but got '{}'", topk_);
         throw_exception(eInvalidArgument);
       }
     }
   }
 
   Result<Value> operator()(const Value& infer_res) {
-    DEBUG("infer_res: {}", infer_res);
+    MMDEPLOY_DEBUG("infer_res: {}", infer_res);
     auto output = infer_res["output"].get<Tensor>();
 
     if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
-      ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
-            (int)output.data_type());
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
+                     (int)output.data_type());
       return Status(eNotSupported);
     }
 
@@ -53,7 +54,7 @@ class LinearClsHead : public MMClassification {
                  [&](int i, int j) { return scores_data[i] > scores_data[j]; });
     for (int i = 0; i < topk_; ++i) {
       auto label = ClassifyOutput::Label{idx[i], scores_data[idx[i]]};
-      DEBUG("label_id: {}, score: {}", label.label_id, label.score);
+      MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
       output.labels.push_back(label);
     }
     return to_value(std::move(output));
diff --git a/csrc/codebase/mmcls/mmcls.cpp b/csrc/codebase/mmcls/mmcls.cpp
index cd59907244..973a4c6d80 100644
--- a/csrc/codebase/mmcls/mmcls.cpp
+++ b/csrc/codebase/mmcls/mmcls.cpp
@@ -2,8 +2,12 @@
 
 #include "codebase/mmcls/mmcls.h"
 
-namespace mmdeploy::mmcls {
+namespace mmdeploy {
+namespace mmcls {
 
 REGISTER_CODEBASE(MMClassification);
 
-}  // namespace mmdeploy::mmcls
+}
+
+MMDEPLOY_DEFINE_REGISTRY(mmcls::MMClassification);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/mmcls/mmcls.h b/csrc/codebase/mmcls/mmcls.h
index 5cddfd197d..2b87b2d538 100644
--- a/csrc/codebase/mmcls/mmcls.h
+++ b/csrc/codebase/mmcls/mmcls.h
@@ -8,7 +8,8 @@
 #include "core/module.h"
 #include "core/serialization.h"
 
-namespace mmdeploy::mmcls {
+namespace mmdeploy {
+namespace mmcls {
 
 struct ClassifyOutput {
   struct Label {
@@ -21,7 +22,9 @@ struct ClassifyOutput {
 };
 
 DECLARE_CODEBASE(MMClassification, mmcls);
+}  // namespace mmcls
 
-}  // namespace mmdeploy::mmcls
+MMDEPLOY_DECLARE_REGISTRY(mmcls::MMClassification);
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_CODEBASE_MMCLS_MMCLS_H_
diff --git a/csrc/codebase/mmdet/CMakeLists.txt b/csrc/codebase/mmdet/CMakeLists.txt
index 31a00813bb..75ae6c4d4b 100644
--- a/csrc/codebase/mmdet/CMakeLists.txt
+++ b/csrc/codebase/mmdet/CMakeLists.txt
@@ -3,10 +3,11 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_mmdet)
 
 include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core opencv_core)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME}
+        PRIVATE mmdeploy_opencv_utils)
+
 add_library(mmdeploy::mmdet ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/codebase/mmdet/instance_segmentation.cpp b/csrc/codebase/mmdet/instance_segmentation.cpp
index 481e0b1e23..638ce0b80a 100644
--- a/csrc/codebase/mmdet/instance_segmentation.cpp
+++ b/csrc/codebase/mmdet/instance_segmentation.cpp
@@ -5,7 +5,7 @@
 #include "experimental/module_adapter.h"
 #include "object_detection.h"
 #include "opencv2/imgproc/imgproc.hpp"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 
 namespace mmdeploy::mmdet {
 
@@ -19,35 +19,35 @@ class ResizeInstanceMask : public ResizeBBox {
 
   // TODO: remove duplication
   Result<Value> operator()(const Value& prep_res, const Value& infer_res) {
-    DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
+    MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
     try {
       auto dets = infer_res["dets"].get<Tensor>();
       auto labels = infer_res["labels"].get<Tensor>();
       auto masks = infer_res["masks"].get<Tensor>();
 
-      DEBUG("dets.shape: {}", dets.shape());
-      DEBUG("labels.shape: {}", labels.shape());
-      DEBUG("masks.shape: {}", masks.shape());
+      MMDEPLOY_DEBUG("dets.shape: {}", dets.shape());
+      MMDEPLOY_DEBUG("labels.shape: {}", labels.shape());
+      MMDEPLOY_DEBUG("masks.shape: {}", masks.shape());
 
       // `dets` is supposed to have 3 dims. They are 'batch', 'bboxes_number'
       // and 'channels' respectively
       if (!(dets.shape().size() == 3 && dets.data_type() == DataType::kFLOAT)) {
-        ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(),
-              (int)dets.data_type());
+        MMDEPLOY_ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(),
+                       (int)dets.data_type());
         return Status(eNotSupported);
       }
 
       // `labels` is supposed to have 2 dims, which are 'batch' and
       // 'bboxes_number'
       if (labels.shape().size() != 2) {
-        ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
-              (int)labels.data_type());
+        MMDEPLOY_ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
+                       (int)labels.data_type());
         return Status(eNotSupported);
       }
 
       if (!(masks.shape().size() == 4 && masks.data_type() == DataType::kFLOAT)) {
-        ERROR("unsupported `mask` tensor, shape: {}, dtype: {}", masks.shape(),
-              (int)masks.data_type());
+        MMDEPLOY_ERROR("unsupported `mask` tensor, shape: {}, dtype: {}", masks.shape(),
+                       (int)masks.data_type());
         return Status(eNotSupported);
       }
 
@@ -65,7 +65,7 @@ class ResizeInstanceMask : public ResizeBBox {
 
       return to_value(result);
     } catch (const std::exception& e) {
-      ERROR("{}", e.what());
+      MMDEPLOY_ERROR("{}", e.what());
       return Status(eFail);
     }
   }
@@ -74,10 +74,10 @@ class ResizeInstanceMask : public ResizeBBox {
   void ProcessMasks(DetectorOutput& result, Tensor cpu_masks, int img_w, int img_h) const {
     auto shape = TensorShape{cpu_masks.shape(1), cpu_masks.shape(2), cpu_masks.shape(3)};
     cpu_masks.Reshape(shape);
-
+    MMDEPLOY_DEBUG("{}, {}", cpu_masks.shape(), cpu_masks.data_type());
     for (auto& det : result.detections) {
       auto mask = cpu_masks.Slice(det.index);
-      cv::Mat mask_mat(mask.shape(1), mask.shape(2), CV_32F, mask.data<float>());
+      cv::Mat mask_mat((int)mask.shape(1), (int)mask.shape(2), CV_32F, mask.data<float>());
       cv::Mat warped_mask;
       auto& bbox = det.bbox;
       // same as mmdet with skip_empty = True
@@ -97,7 +97,9 @@ class ResizeInstanceMask : public ResizeBBox {
       cv::warpAffine(mask_mat, warped_mask, m, cv::Size{width, height},
                      cv::INTER_LINEAR | cv::WARP_INVERSE_MAP);
       warped_mask = warped_mask > mask_thr_binary_;
-      det.mask = cpu::CVMat2Mat(warped_mask, PixelFormat::kGRAYSCALE);
+
+      det.mask = Mat(height, width, PixelFormat::kGRAYSCALE, DataType::kINT8,
+                     std::shared_ptr<void>(warped_mask.data, [mat = warped_mask](void*) {}));
     }
   }
 
diff --git a/csrc/codebase/mmdet/mmdet.cpp b/csrc/codebase/mmdet/mmdet.cpp
index 218b73c502..45fe21f9b0 100644
--- a/csrc/codebase/mmdet/mmdet.cpp
+++ b/csrc/codebase/mmdet/mmdet.cpp
@@ -2,8 +2,12 @@
 
 #include "codebase/mmdet/mmdet.h"
 
-namespace mmdeploy::mmdet {
+namespace mmdeploy {
+namespace mmdet {
 
 REGISTER_CODEBASE(MMDetection);
 
-}  // namespace mmdeploy::mmdet
+}
+
+MMDEPLOY_DEFINE_REGISTRY(mmdet::MMDetection);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/mmdet/mmdet.h b/csrc/codebase/mmdet/mmdet.h
index a663004038..dcb1e18194 100644
--- a/csrc/codebase/mmdet/mmdet.h
+++ b/csrc/codebase/mmdet/mmdet.h
@@ -3,13 +3,17 @@
 #ifndef MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
 #define MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
 
+#include <array>
+
 #include "codebase/common.h"
 #include "core/device.h"
 #include "core/mat.h"
 #include "core/module.h"
+#include "core/registry.h"
 #include "core/serialization.h"
 
-namespace mmdeploy::mmdet {
+namespace mmdeploy {
+namespace mmdet {
 
 struct DetectorOutput {
   struct Detection {
@@ -25,7 +29,9 @@ struct DetectorOutput {
 };
 
 DECLARE_CODEBASE(MMDetection, mmdet);
+}  // namespace mmdet
 
-}  // namespace mmdeploy::mmdet
+MMDEPLOY_DECLARE_REGISTRY(mmdet::MMDetection);
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_CODEBASE_MMDET_MMDET_H_
diff --git a/csrc/codebase/mmdet/object_detection.cpp b/csrc/codebase/mmdet/object_detection.cpp
index 6a7c6d6a37..62a5c21017 100644
--- a/csrc/codebase/mmdet/object_detection.cpp
+++ b/csrc/codebase/mmdet/object_detection.cpp
@@ -17,26 +17,27 @@ ResizeBBox::ResizeBBox(const Value& cfg) : MMDetection(cfg) {
   }
 }
 Result<Value> ResizeBBox::operator()(const Value& prep_res, const Value& infer_res) {
-  DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
+  MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
   try {
     auto dets = infer_res["dets"].get<Tensor>();
     auto labels = infer_res["labels"].get<Tensor>();
 
-    DEBUG("dets.shape: {}", dets.shape());
-    DEBUG("labels.shape: {}", labels.shape());
+    MMDEPLOY_DEBUG("dets.shape: {}", dets.shape());
+    MMDEPLOY_DEBUG("labels.shape: {}", labels.shape());
 
     // `dets` is supposed to have 3 dims. They are 'batch', 'bboxes_number'
     // and 'channels' respectively
     if (!(dets.shape().size() == 3 && dets.data_type() == DataType::kFLOAT)) {
-      ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(), (int)dets.data_type());
+      MMDEPLOY_ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(),
+                     (int)dets.data_type());
       return Status(eNotSupported);
     }
 
     // `labels` is supposed to have 2 dims, which are 'batch' and
     // 'bboxes_number'
     if (labels.shape().size() != 2) {
-      ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
-            (int)labels.data_type());
+      MMDEPLOY_ERROR("unsupported `labels`, tensor, shape: {}, dtype: {}", labels.shape(),
+                     (int)labels.data_type());
       return Status(eNotSupported);
     }
 
@@ -98,16 +99,17 @@ Result<DetectorOutput> ResizeBBox::GetBBoxes(const Value& prep_res, const Tensor
     auto right = dets_ptr[2];
     auto bottom = dets_ptr[3];
 
-    DEBUG("ori left {}, top {}, right {}, bottom {}, label {}", left, top, right, bottom,
-          *labels_ptr);
+    MMDEPLOY_DEBUG("ori left {}, top {}, right {}, bottom {}, label {}", left, top, right, bottom,
+                   *labels_ptr);
     auto rect = MapToOriginImage(left, top, right, bottom, scale_factor.data(), w_offset, h_offset,
                                  ori_width, ori_height);
     if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) {
-      DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
-            rect[3] - rect[1]);
+      MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
+                     rect[3] - rect[1]);
       continue;
     }
-    DEBUG("remap left {}, top {}, right {}, bottom {}", rect[0], rect[1], rect[2], rect[3]);
+    MMDEPLOY_DEBUG("remap left {}, top {}, right {}, bottom {}", rect[0], rect[1], rect[2],
+                   rect[3]);
     DetectorOutput::Detection det{};
     det.index = i;
     det.label_id = static_cast<int>(*labels_ptr);
diff --git a/csrc/codebase/mmedit/CMakeLists.txt b/csrc/codebase/mmedit/CMakeLists.txt
index a546642551..59646d0f6a 100644
--- a/csrc/codebase/mmedit/CMakeLists.txt
+++ b/csrc/codebase/mmedit/CMakeLists.txt
@@ -3,10 +3,9 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_mmedit)
 
 include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core opencv_core)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME} PRIVATE opencv_core)
 add_library(mmdeploy::mmedit ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/codebase/mmedit/mmedit.cpp b/csrc/codebase/mmedit/mmedit.cpp
index 28a8dfa1d2..b10c084b24 100644
--- a/csrc/codebase/mmedit/mmedit.cpp
+++ b/csrc/codebase/mmedit/mmedit.cpp
@@ -4,8 +4,12 @@
 
 #include "core/registry.h"
 
-namespace mmdeploy::mmedit {
+namespace mmdeploy {
+namespace mmedit {
 
 REGISTER_CODEBASE(MMEdit);
 
-}  // namespace mmdeploy::mmedit
+}  // namespace mmedit
+
+MMDEPLOY_DEFINE_REGISTRY(mmedit::MMEdit);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/mmedit/mmedit.h b/csrc/codebase/mmedit/mmedit.h
index ed9c3cdc88..e7c4285134 100644
--- a/csrc/codebase/mmedit/mmedit.h
+++ b/csrc/codebase/mmedit/mmedit.h
@@ -9,12 +9,16 @@
 #include "core/module.h"
 #include "core/serialization.h"
 
-namespace mmdeploy::mmedit {
+namespace mmdeploy {
+namespace mmedit {
 
 using RestorerOutput = Mat;
 
 DECLARE_CODEBASE(MMEdit, mmedit);
 
-}  // namespace mmdeploy::mmedit
+}  // namespace mmedit
+
+MMDEPLOY_DECLARE_REGISTRY(mmedit::MMEdit);
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_CODEBASE_MMEDIT_MMEDIT_H_
diff --git a/csrc/codebase/mmedit/restorer.cpp b/csrc/codebase/mmedit/restorer.cpp
index da06075a4d..84626d15be 100644
--- a/csrc/codebase/mmedit/restorer.cpp
+++ b/csrc/codebase/mmedit/restorer.cpp
@@ -32,8 +32,8 @@ class TensorToImg : public MMEdit {
       mat_hwc.convertTo(rescale_uint8, CV_8UC(channels), 255.f);
       return mat;
     } else {
-      ERROR("unsupported `output` tensor, shape: {}, dtype: {}", upscale.shape(),
-            (int)upscale.data_type());
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", upscale.shape(),
+                     (int)upscale.data_type());
       return Status(eNotSupported);
     }
   }
diff --git a/csrc/codebase/mmocr/CMakeLists.txt b/csrc/codebase/mmocr/CMakeLists.txt
index 42e63a0dff..60ac5c6ff4 100644
--- a/csrc/codebase/mmocr/CMakeLists.txt
+++ b/csrc/codebase/mmocr/CMakeLists.txt
@@ -3,13 +3,13 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_mmocr)
 
 include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} OCR_SRCS)
 aux_source_directory(${CMAKE_SOURCE_DIR}/third_party/clipper CLIPPER_SRCS)
 set(SRCS ${OCR_SRCS} ${CLIPPER_SRCS})
-build_target(${PROJECT_NAME} "${SRCS}")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/third_party/clipper)
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core opencv_core opencv_imgproc)
+target_link_libraries(${PROJECT_NAME}
+        PRIVATE mmdeploy::transform mmdeploy_opencv_utils)
 add_library(mmdeploy::mmocr ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/codebase/mmocr/crnn.cpp b/csrc/codebase/mmocr/crnn.cpp
index bd6c4a6179..fc611e8588 100644
--- a/csrc/codebase/mmocr/crnn.cpp
+++ b/csrc/codebase/mmocr/crnn.cpp
@@ -1,5 +1,6 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <algorithm>
 #include <sstream>
 
 #include "core/device.h"
@@ -22,7 +23,7 @@ class CTCConvertor : public MMOCR {
   explicit CTCConvertor(const Value& cfg) : MMOCR(cfg) {
     auto model = cfg["context"]["model"].get<Model>();
     if (!cfg.contains("params")) {
-      ERROR("'params' is required, but it's not in the config");
+      MMDEPLOY_ERROR("'params' is required, but it's not in the config");
       throw_exception(eInvalidArgument);
     }
     // BaseConverter
@@ -40,11 +41,11 @@ class CTCConvertor : public MMOCR {
       } else if (dict_type == "DICT90") {
         idx2char_ = SplitChars(DICT90);
       } else {
-        ERROR("unknown dict_type: {}", dict_type);
+        MMDEPLOY_ERROR("unknown dict_type: {}", dict_type);
         throw_exception(eInvalidArgument);
       }
     } else {
-      ERROR("either dict_file, dict_list or dict_type must be specified");
+      MMDEPLOY_ERROR("either dict_file, dict_list or dict_type must be specified");
       throw_exception(eInvalidArgument);
     }
     // CTCConverter
@@ -62,8 +63,8 @@ class CTCConvertor : public MMOCR {
     auto d_conf = _prob["output"].get<Tensor>();
 
     if (!(d_conf.shape().size() == 3 && d_conf.data_type() == DataType::kFLOAT)) {
-      ERROR("unsupported `output` tensor, shape: {}, dtype: {}", d_conf.shape(),
-            (int)d_conf.data_type());
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", d_conf.shape(),
+                     (int)d_conf.data_type());
       return Status(eNotSupported);
     }
 
@@ -80,7 +81,7 @@ class CTCConvertor : public MMOCR {
     auto [indexes, scores] = Tensor2Idx(data, w, c, valid_ratio);
 
     auto text = Idx2Str(indexes);
-    DEBUG("text: {}", text);
+    MMDEPLOY_DEBUG("text: {}", text);
 
     TextRecognizerOutput output{text, scores};
 
diff --git a/csrc/codebase/mmocr/dbnet.cpp b/csrc/codebase/mmocr/dbnet.cpp
index 93a3d0400b..bd90dca3f3 100644
--- a/csrc/codebase/mmocr/dbnet.cpp
+++ b/csrc/codebase/mmocr/dbnet.cpp
@@ -13,7 +13,7 @@
 #include "core/value.h"
 #include "experimental/module_adapter.h"
 #include "mmocr.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 
 namespace mmdeploy::mmocr {
 
@@ -37,21 +37,21 @@ class DBHead : public MMOCR {
   }
 
   Result<Value> operator()(const Value& _data, const Value& _prob) {
-    DEBUG("preprocess_result: {}", _data);
-    DEBUG("inference_result: {}", _prob);
+    MMDEPLOY_DEBUG("preprocess_result: {}", _data);
+    MMDEPLOY_DEBUG("inference_result: {}", _prob);
 
     auto img = _data["img"].get<Tensor>();
-    DEBUG("img shape: {}", img.shape());
+    MMDEPLOY_DEBUG("img shape: {}", img.shape());
 
     Device cpu_device{"cpu"};
     OUTCOME_TRY(auto conf,
                 MakeAvailableOnDevice(_prob["output"].get<Tensor>(), cpu_device, stream_));
     OUTCOME_TRY(stream_.Wait());
-    DEBUG("shape: {}", conf.shape());
+    MMDEPLOY_DEBUG("shape: {}", conf.shape());
 
     if (!(conf.shape().size() == 4 && conf.data_type() == DataType::kFLOAT)) {
-      ERROR("unsupported `output` tensor, shape: {}, dtype: {}", conf.shape(),
-            (int)conf.data_type());
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", conf.shape(),
+                     (int)conf.data_type());
       return Status(eNotSupported);
     }
 
@@ -103,7 +103,7 @@ class DBHead : public MMOCR {
       } else {
         assert(0);
       }
-      DEBUG("score: {}", score);
+      MMDEPLOY_DEBUG("score: {}", score);
       //      cv::drawContours(score_map, vector<vector<cv::Point>>{approx}, -1, 1);
 
       vector<cv::Point2f> scaled(begin(approx), end(approx));
@@ -133,7 +133,7 @@ class DBHead : public MMOCR {
 
     cv::Mat mask(rect.size(), CV_8U, cv::Scalar(0));
 
-    cv::fillPoly(mask, std::vector{box}, 1, cv::LINE_8, 0, -rect.tl());
+    cv::fillPoly(mask, std::vector<std::vector<cv::Point>>{box}, 1, cv::LINE_8, 0, -rect.tl());
     auto mean = cv::mean(bitmap(rect), mask)[0];
     return static_cast<float>(mean);
   }
diff --git a/csrc/codebase/mmocr/mmocr.cpp b/csrc/codebase/mmocr/mmocr.cpp
index 2935f03b5a..f34f918afa 100644
--- a/csrc/codebase/mmocr/mmocr.cpp
+++ b/csrc/codebase/mmocr/mmocr.cpp
@@ -5,8 +5,12 @@
 #include "core/registry.h"
 #include "core/utils/formatter.h"
 
-namespace mmdeploy::mmocr {
+namespace mmdeploy {
+namespace mmocr {
 
 REGISTER_CODEBASE(MMOCR);
 
-}  // namespace mmdeploy::mmocr
+}  // namespace mmocr
+
+MMDEPLOY_DEFINE_REGISTRY(mmocr::MMOCR);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/mmocr/mmocr.h b/csrc/codebase/mmocr/mmocr.h
index 42098af746..1871b6755c 100644
--- a/csrc/codebase/mmocr/mmocr.h
+++ b/csrc/codebase/mmocr/mmocr.h
@@ -7,7 +7,8 @@
 #include "core/device.h"
 #include "core/module.h"
 
-namespace mmdeploy::mmocr {
+namespace mmdeploy {
+namespace mmocr {
 
 struct TextDetectorOutput {
   std::vector<std::array<float, 8>> boxes;
@@ -23,6 +24,9 @@ struct TextRecognizerOutput {
 
 DECLARE_CODEBASE(MMOCR, mmocr);
 
-}  // namespace mmdeploy::mmocr
+}  // namespace mmocr
+
+MMDEPLOY_DECLARE_REGISTRY(mmocr::MMOCR);
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_MMOCR_H
diff --git a/csrc/codebase/mmocr/resize_ocr.cpp b/csrc/codebase/mmocr/resize_ocr.cpp
index 2da9bac7a7..69d7602d18 100644
--- a/csrc/codebase/mmocr/resize_ocr.cpp
+++ b/csrc/codebase/mmocr/resize_ocr.cpp
@@ -4,12 +4,14 @@
 
 #include "archive/json_archive.h"
 #include "archive/value_archive.h"
+#include "core/registry.h"
 #include "core/tensor.h"
 #include "core/utils/device_utils.h"
 #include "core/utils/formatter.h"
 #include "opencv2/imgproc.hpp"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/resize.h"
+#include "preprocess/transform/transform.h"
 
 using namespace std;
 
@@ -37,7 +39,7 @@ class ResizeOCRImpl : public Module {
   ~ResizeOCRImpl() override = default;
 
   Result<Value> Process(const Value& input) override {
-    DEBUG("input: {}", input);
+    MMDEPLOY_DEBUG("input: {}", input);
     auto dst_height = height_;
     auto dst_min_width = min_width_;
     auto dst_max_width = max_width_;
@@ -84,7 +86,7 @@ class ResizeOCRImpl : public Module {
     output["resize_shape"] = to_value(img_resize.desc().shape);
     output["pad_shape"] = output["resize_shape"];
     output["valid_ratio"] = valid_ratio;
-    DEBUG("output: {}", to_json(output).dump(2));
+    MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
     return output;
   }
 
@@ -95,7 +97,7 @@ class ResizeOCRImpl : public Module {
     int h = desc.shape[1];
     int w = desc.shape[2];
     int c = desc.shape[3];
-    assert(c == 3 or c == 1);
+    assert(c == 3 || c == 1);
     cv::Mat src_mat, dst_mat;
     if (3 == c) {  // rgb
       src_mat = cv::Mat(h, w, CV_8UC3, const_cast<uint8_t*>(img.data<uint8_t>()));
@@ -135,6 +137,8 @@ class ResizeOCRImplCreator : public Creator<ResizeOCRImpl> {
   ReturnType Create(const Value& args) override { return std::make_unique<ResizeOCRImpl>(args); }
 };
 
+MMDEPLOY_DEFINE_REGISTRY(ResizeOCRImpl);
+
 REGISTER_MODULE(ResizeOCRImpl, ResizeOCRImplCreator);
 
 class ResizeOCR : public Transform {
diff --git a/csrc/codebase/mmocr/warp.cpp b/csrc/codebase/mmocr/warp.cpp
index b54e6a7897..56566f6d12 100644
--- a/csrc/codebase/mmocr/warp.cpp
+++ b/csrc/codebase/mmocr/warp.cpp
@@ -8,7 +8,7 @@
 #include "core/utils/formatter.h"
 #include "core/value.h"
 #include "experimental/module_adapter.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 
 namespace mmdeploy {
 
diff --git a/csrc/codebase/mmseg/CMakeLists.txt b/csrc/codebase/mmseg/CMakeLists.txt
index 55bb7e9d08..89a15bb32f 100644
--- a/csrc/codebase/mmseg/CMakeLists.txt
+++ b/csrc/codebase/mmseg/CMakeLists.txt
@@ -3,10 +3,9 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_mmseg)
 
 include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core opencv_core)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_opencv_utils)
 add_library(mmdeploy::mmseg ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/codebase/mmseg/mmseg.cpp b/csrc/codebase/mmseg/mmseg.cpp
index 78e1d490e5..6f080b8221 100644
--- a/csrc/codebase/mmseg/mmseg.cpp
+++ b/csrc/codebase/mmseg/mmseg.cpp
@@ -4,8 +4,12 @@
 
 using namespace std;
 
-namespace mmdeploy::mmseg {
+namespace mmdeploy {
+namespace mmseg {
 
 REGISTER_CODEBASE(MMSegmentation);
 
-}  // namespace mmdeploy::mmseg
+}
+
+MMDEPLOY_DEFINE_REGISTRY(mmseg::MMSegmentation);
+}  // namespace mmdeploy
diff --git a/csrc/codebase/mmseg/mmseg.h b/csrc/codebase/mmseg/mmseg.h
index 3685c35bf2..9122047836 100644
--- a/csrc/codebase/mmseg/mmseg.h
+++ b/csrc/codebase/mmseg/mmseg.h
@@ -8,7 +8,8 @@
 #include "core/module.h"
 #include "core/tensor.h"
 
-namespace mmdeploy::mmseg {
+namespace mmdeploy {
+namespace mmseg {
 
 struct SegmentorOutput {
   Tensor mask;
@@ -20,6 +21,9 @@ struct SegmentorOutput {
 
 DECLARE_CODEBASE(MMSegmentation, mmseg);
 
-}  // namespace mmdeploy::mmseg
+}  // namespace mmseg
+
+MMDEPLOY_DECLARE_REGISTRY(mmseg::MMSegmentation);
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_MMSEG_H
diff --git a/csrc/codebase/mmseg/segment.cpp b/csrc/codebase/mmseg/segment.cpp
index 48afa9b572..8d5aeef08e 100644
--- a/csrc/codebase/mmseg/segment.cpp
+++ b/csrc/codebase/mmseg/segment.cpp
@@ -4,7 +4,7 @@
 #include "core/tensor.h"
 #include "core/utils/device_utils.h"
 #include "core/utils/formatter.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 
 namespace mmdeploy::mmseg {
@@ -15,19 +15,19 @@ class ResizeMask : public MMSegmentation {
     try {
       classes_ = cfg["params"]["num_classes"].get<int>();
     } catch (const std::exception &e) {
-      ERROR("no ['params']['num_classes'] is specified in cfg: {}", cfg);
+      MMDEPLOY_ERROR("no ['params']['num_classes'] is specified in cfg: {}", cfg);
       throw_exception(eInvalidArgument);
     }
   }
 
   Result<Value> operator()(const Value &preprocess_result, const Value &inference_result) {
-    DEBUG("preprocess: {}\ninference: {}", preprocess_result, inference_result);
+    MMDEPLOY_DEBUG("preprocess: {}\ninference: {}", preprocess_result, inference_result);
 
     auto mask = inference_result["output"].get<Tensor>();
-    DEBUG("tensor.name: {}, tensor.shape: {}, tensor.data_type: {}", mask.name(), mask.shape(),
-          mask.data_type());
+    MMDEPLOY_DEBUG("tensor.name: {}, tensor.shape: {}, tensor.data_type: {}", mask.name(),
+                   mask.shape(), mask.data_type());
     if (!(mask.shape().size() == 4 && mask.shape(0) == 1 && mask.shape(1) == 1)) {
-      ERROR("unsupported `output` tensor, shape: {}", mask.shape());
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}", mask.shape());
       return Status(eNotSupported);
     }
 
@@ -40,16 +40,14 @@ class ResizeMask : public MMSegmentation {
     OUTCOME_TRY(stream_.Wait());
     if (mask.data_type() == DataType::kINT64) {
       // change kINT64 to 2 INT32
-      TensorDesc desc{.device = host_tensor.device(),
-                      .data_type = DataType::kINT32,
-                      .shape = {1, 2, height, width},
-                      .name = host_tensor.name()};
+      TensorDesc desc{
+          host_tensor.device(), DataType::kINT32, {1, 2, height, width}, host_tensor.name()};
       Tensor _host_tensor(desc, mask.buffer());
       return MaskResize(_host_tensor, input_height, input_width);
     } else if (mask.data_type() == DataType::kINT32) {
       return MaskResize(host_tensor, input_height, input_width);
     } else {
-      ERROR("unsupported `output` tensor, dtype: {}", (int)mask.data_type());
+      MMDEPLOY_ERROR("unsupported `output` tensor, dtype: {}", (int)mask.data_type());
       return Status(eNotSupported);
     }
   }
diff --git a/csrc/core/CMakeLists.txt b/csrc/core/CMakeLists.txt
index ef539a05b2..19b9a64a97 100644
--- a/csrc/core/CMakeLists.txt
+++ b/csrc/core/CMakeLists.txt
@@ -11,15 +11,14 @@ reliably on all generators, or if a new generator is added in the future that ca
 projects using it will be stuck. Even if CONFIGURE_DEPENDS works reliably, there is still a cost
 to perform the check on every rebuild.
 #]==]
-# file(GLOB_RECURSE CORE_SRCS *.cpp)
 set(SPDLOG_LIB)
 find_package(spdlog QUIET)
 if (spdlog_FOUND)
     message(STATUS "spdlog is found")
-    set(SPDLOG_LIB $<BUILD_INTERFACE:spdlog::spdlog>)
+    set(SPDLOG_LIB spdlog::spdlog)
 endif ()
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(SRCS
         device_impl.cpp
@@ -32,11 +31,14 @@ set(SRCS
         operator.cpp
         status_code.cpp
         tensor.cpp
+        registry.cpp
         utils/device_utils.cpp
         utils/formatter.cpp
         utils/stacktrace.cpp)
-build_target(${PROJECT_NAME} "${SRCS}")
+
+mmdeploy_add_library(${PROJECT_NAME} ${SRCS})
 target_compile_definitions(${PROJECT_NAME} PUBLIC -DMMDEPLOY_STATUS_USE_SOURCE_LOCATION=1)
+
 target_include_directories(${PROJECT_NAME}
         PUBLIC
         $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/csrc>
@@ -45,7 +47,10 @@ target_include_directories(${PROJECT_NAME}
         $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/third_party/json>
         $<INSTALL_INTERFACE:include/cpp>
         )
-target_link_libraries(${PROJECT_NAME} PUBLIC ${SPDLOG_LIB} stdc++fs)
+target_link_libraries(${PROJECT_NAME} PUBLIC ${SPDLOG_LIB})
+if (NOT MSVC)
+    target_link_libraries(${PROJECT_NAME} PUBLIC stdc++fs)
+endif ()
 add_library(mmdeploy::core ALIAS ${PROJECT_NAME})
 
 install(DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/core
@@ -57,5 +62,3 @@ install(FILES ${CMAKE_SOURCE_DIR}/third_party/outcome/outcome-experimental.hpp
 install(DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/experimental
         DESTINATION include/cpp
         FILES_MATCHING PATTERN "*.h")
-
-export_target(${PROJECT_NAME})
diff --git a/csrc/core/device.h b/csrc/core/device.h
index 6695ca3807..f337ad4dcd 100644
--- a/csrc/core/device.h
+++ b/csrc/core/device.h
@@ -10,6 +10,7 @@
 #include <string>
 #include <vector>
 
+#include "core/macro.h"
 #include "core/status_code.h"
 
 namespace mmdeploy {
@@ -67,7 +68,7 @@ class Device {
   constexpr explicit Device(int platform_id, int device_id = 0)
       : platform_id_(platform_id), device_id_(device_id) {}
 
-  explicit Device(const char *platform_name, int device_id = 0);
+  MMDEPLOY_API explicit Device(const char *platform_name, int device_id = 0);
 
   constexpr int device_id() const noexcept { return device_id_; }
 
@@ -100,7 +101,7 @@ class Device {
 
 enum class MemcpyKind : int { HtoD, DtoH, DtoD };
 
-class Platform {
+class MMDEPLOY_API Platform {
  public:
   // throws if not found
   explicit Platform(const char *platform_name);
@@ -133,7 +134,7 @@ Platform GetPlatform(int platform_id);
 
 Platform GetPlatform(const char *platform_name);
 
-class Stream {
+class MMDEPLOY_API Stream {
  public:
   Stream() = default;
 
@@ -187,7 +188,7 @@ T GetNative(Stream &stream, ErrorCode *ec = nullptr) {
   return reinterpret_cast<T>(stream.GetNative(ec));
 }
 
-class Event {
+class MMDEPLOY_API Event {
  public:
   Event() = default;
 
@@ -226,7 +227,7 @@ T GetNative(Event &event, ErrorCode *ec = nullptr) {
   return reinterpret_cast<T>(event.GetNative(ec));
 }
 
-class Kernel {
+class MMDEPLOY_API Kernel {
  public:
   Kernel() = default;
   explicit Kernel(std::shared_ptr<KernelImpl> impl) : impl_(std::move(impl)) {}
@@ -246,7 +247,7 @@ T GetNative(Kernel &kernel, ErrorCode *ec = nullptr) {
   return reinterpret_cast<T>(kernel.GetNative(ec));
 }
 
-class Allocator {
+class MMDEPLOY_API Allocator {
   friend class Access;
 
  public:
@@ -259,7 +260,7 @@ class Allocator {
   std::shared_ptr<AllocatorImpl> impl_;
 };
 
-class Buffer {
+class MMDEPLOY_API Buffer {
  public:
   Buffer() = default;
 
@@ -304,7 +305,7 @@ T GetNative(const Buffer &buffer, ErrorCode *ec = nullptr) {
   return reinterpret_cast<T>(buffer.GetNative(ec));
 }
 
-class PlatformRegistry {
+class MMDEPLOY_API PlatformRegistry {
  public:
   using Creator = std::function<std::shared_ptr<PlatformImpl>()>;
 
@@ -332,6 +333,6 @@ class PlatformRegistry {
   std::vector<Entry> entries_;
 };
 
-PlatformRegistry &gPlatformRegistry();
+MMDEPLOY_API PlatformRegistry &gPlatformRegistry();
 
 }  // namespace mmdeploy
diff --git a/csrc/core/device_impl.cpp b/csrc/core/device_impl.cpp
index 04b772314e..32ed9e104f 100644
--- a/csrc/core/device_impl.cpp
+++ b/csrc/core/device_impl.cpp
@@ -144,7 +144,7 @@ Stream::Stream(Device device, uint64_t flags) {
       r.error().throw_exception();
     }
   } else {
-    ERROR("{}, {}", device.device_id(), device.platform_id());
+    MMDEPLOY_ERROR("{}, {}", device.device_id(), device.platform_id());
     throw_exception(eInvalidArgument);
   }
 }
diff --git a/csrc/core/graph.cpp b/csrc/core/graph.cpp
index a824630059..524542928e 100644
--- a/csrc/core/graph.cpp
+++ b/csrc/core/graph.cpp
@@ -3,8 +3,10 @@
 #include "core/graph.h"
 
 #include "archive/value_archive.h"
+#include "core/registry.h"
 
-namespace mmdeploy::graph {
+namespace mmdeploy {
+namespace graph {
 
 TaskGraph::Handle* TaskGraph::Add(TaskFunction fn) {
   function_.push_back(std::move(fn));
@@ -14,7 +16,8 @@ TaskGraph::Handle* TaskGraph::Add(TaskFunction fn) {
 
 TaskGraph::~TaskGraph() {
   for (int i = 0; i < time_.size(); ++i) {
-    INFO("node {} ({}): {} ms", i, handle_[i]->name(), static_cast<float>(time_[i]) / count_);
+    MMDEPLOY_INFO("node {} ({}): {} ms", i, handle_[i]->name(),
+                  static_cast<float>(time_[i]) / count_);
   }
 }
 
@@ -75,4 +78,8 @@ std::vector<Result<Value>> Context::Execute(Span<std::function<Result<Value>()>>
   return graph_->Execute(tasks);
 }
 
-}  // namespace mmdeploy::graph
+}  // namespace graph
+
+MMDEPLOY_DEFINE_REGISTRY(graph::Node);
+
+}  // namespace mmdeploy
diff --git a/csrc/core/graph.h b/csrc/core/graph.h
index 37c1aa189f..d55afe70fa 100644
--- a/csrc/core/graph.h
+++ b/csrc/core/graph.h
@@ -14,7 +14,9 @@
 #include "taskflow/taskflow.hpp"
 #endif
 
-namespace mmdeploy::graph {
+namespace mmdeploy {
+
+namespace graph {
 
 using std::pair;
 using std::string;
@@ -24,7 +26,7 @@ using std::vector;
 class TaskGraph;
 class Node;
 
-class Context {
+class MMDEPLOY_API Context {
  public:
   explicit Context(TaskGraph* graph) : graph_(graph) {}
 
@@ -48,7 +50,7 @@ class Context {
   TaskGraph* graph_;
 };
 
-class TaskGraph {
+class MMDEPLOY_API TaskGraph {
   friend class Context;
 
  public:
@@ -65,6 +67,10 @@ class TaskGraph {
 
   ~TaskGraph();
 
+  TaskGraph() = default;
+  TaskGraph(const TaskGraph&) = delete;
+  TaskGraph& operator=(const TaskGraph&) = delete;
+
   Handle* Add(TaskFunction fn);
 
   Result<Value> Run(Value inputs);
@@ -82,7 +88,7 @@ class TaskGraph {
   int64_t count_{};
 };
 
-class Node {
+class MMDEPLOY_API Node {
  public:
   virtual ~Node() = default;
   virtual void Build(TaskGraph& graph) = 0;
@@ -96,6 +102,10 @@ class Node {
   std::vector<std::string> outputs_;
 };
 
-}  // namespace mmdeploy::graph
+}  // namespace graph
+
+MMDEPLOY_DECLARE_REGISTRY(graph::Node);
+
+}  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_EXPERIMENTAL_PIPELINE_IR_H_
diff --git a/csrc/core/logger.cpp b/csrc/core/logger.cpp
index b858ce785c..a1499cc072 100644
--- a/csrc/core/logger.cpp
+++ b/csrc/core/logger.cpp
@@ -9,6 +9,9 @@
 #include <spdlog/sinks/android_sink.h>
 #else
 #include <spdlog/sinks/stdout_color_sinks.h>
+#if defined(_MSC_VER)
+#include <spdlog/sinks/stdout_sinks.h>
+#endif
 #endif
 #endif
 
diff --git a/csrc/core/logger.h b/csrc/core/logger.h
index 4f7c2ebb84..ff326c4511 100644
--- a/csrc/core/logger.h
+++ b/csrc/core/logger.h
@@ -5,11 +5,13 @@
 
 #include <spdlog/spdlog.h>
 
+#include "core/macro.h"
+
 namespace mmdeploy {
 
-spdlog::logger *GetLogger();
+MMDEPLOY_API spdlog::logger *GetLogger();
 
-void SetLogger(spdlog::logger *logger);
+MMDEPLOY_API void SetLogger(spdlog::logger *logger);
 
 }  // namespace mmdeploy
 
@@ -86,18 +88,4 @@ void SetLogger(spdlog::logger *logger);
 #define MMDEPLOY_CRITICAL(...) (void)0;
 #endif
 
-#undef CRITICAL
-#undef ERROR
-#undef WARN
-#undef INFO
-#undef DEBUG
-#undef TRACE
-
-#define CRITICAL MMDEPLOY_CRITICAL
-#define ERROR MMDEPLOY_ERROR
-#define WARN MMDEPLOY_WARN
-#define INFO MMDEPLOY_INFO
-#define DEBUG MMDEPLOY_DEBUG
-#define TRACE MMDEPLOY_TRACE
-
 #endif  // !CORE_LOG_H
diff --git a/csrc/core/macro.h b/csrc/core/macro.h
index f9822094e9..6f52f0b5fa 100644
--- a/csrc/core/macro.h
+++ b/csrc/core/macro.h
@@ -3,34 +3,119 @@
 #ifndef MMDEPLOY_SRC_CORE_MARCO_H_
 #define MMDEPLOY_SRC_CORE_MARCO_H_
 
+#ifndef MMDEPLOY_EXPORT
 #ifdef _MSC_VER
-#ifdef SDK_EXPORTS
-#define MM_SDK_API __declspec(dllexport)
+#define MMDEPLOY_EXPORT __declspec(dllexport)
 #else
-#define MM_SDK_API
-#endif
-#else /* _MSC_VER */
-#ifdef SDK_EXPORTS
-#define MM_SDK_API __attribute__((visibility("default")))
-#else
-#define MM_SDK_API
+#define MMDEPLOY_EXPORT __attribute__((visibility("default")))
 #endif
 #endif
 
-#ifdef __cplusplus
-#define CV_SDK_API extern "C" MM_SDK_API
+#ifndef MMDEPLOY_API
+#ifdef MMDEPLOY_API_EXPORTS
+#define MMDEPLOY_API MMDEPLOY_EXPORT
 #else
-#define CV_SDK_API MM_SDK_API
+#define MMDEPLOY_API
+#endif
 #endif
 
-#define MMDEPLOY_CONCATENATE_IMPL(s1, s2) s1##s2
-#define MMDEPLOY_CONCATENATE(s1, s2) MMDEPLOY_CONCATENATE_IMPL(s1, s2)
+#define _MMDEPLOY_PP_CONCAT_IMPL(s1, s2) s1##s2
+#define MMDEPLOY_PP_CONCAT(s1, s2) _MMDEPLOY_PP_CONCAT_IMPL(s1, s2)
 
 // ! Be aware of ODR violation when using __COUNTER__
 #ifdef __COUNTER__
-#define MMDEPLOY_ANONYMOUS_VARIABLE(str) MMDEPLOY_CONCATENATE(str, __COUNTER__)
+#define MMDEPLOY_ANONYMOUS_VARIABLE(str) MMDEPLOY_PP_CONCAT(str, __COUNTER__)
 #else
-#define MMDEPLOY_ANONYMOUS_VARIABLE(str) MMDEPLOY_CONCATENATE(str, __LINE__)
+#define MMDEPLOY_ANONYMOUS_VARIABLE(str) MMDEPLOY_PP_CONCAT(str, __LINE__)
 #endif
 
+#define MMDEPLOY_PP_NARG(...) _MMDEPLOY_PP_NARG(__VA_ARGS__, _MMDEPLOY_PP_RESQ_N())
+
+#define _MMDEPLOY_PP_NARG(...) _MMDEPLOY_PP_ARG_N(__VA_ARGS__)
+
+#define _MMDEPLOY_PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, \
+                           _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30,  \
+                           _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44,  \
+                           _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58,  \
+                           _59, _60, _61, _62, _63, N, ...)                                       \
+  N
+
+#define _MMDEPLOY_PP_RESQ_N()                                                                     \
+  63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, \
+      39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \
+      16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+#define MMDEPLOY_PP_MAP_1(f, x) f(x)
+#define MMDEPLOY_PP_MAP_2(f, x, ...) f(x), MMDEPLOY_PP_MAP_1(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_3(f, x, ...) f(x), MMDEPLOY_PP_MAP_2(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_4(f, x, ...) f(x), MMDEPLOY_PP_MAP_3(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_5(f, x, ...) f(x), MMDEPLOY_PP_MAP_4(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_6(f, x, ...) f(x), MMDEPLOY_PP_MAP_5(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_7(f, x, ...) f(x), MMDEPLOY_PP_MAP_6(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_8(f, x, ...) f(x), MMDEPLOY_PP_MAP_7(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_9(f, x, ...) f(x), MMDEPLOY_PP_MAP_8(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_10(f, x, ...) f(x), MMDEPLOY_PP_MAP_9(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_11(f, x, ...) f(x), MMDEPLOY_PP_MAP_10(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_12(f, x, ...) f(x), MMDEPLOY_PP_MAP_11(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_13(f, x, ...) f(x), MMDEPLOY_PP_MAP_12(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_14(f, x, ...) f(x), MMDEPLOY_PP_MAP_13(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_15(f, x, ...) f(x), MMDEPLOY_PP_MAP_14(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_16(f, x, ...) f(x), MMDEPLOY_PP_MAP_15(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_17(f, x, ...) f(x), MMDEPLOY_PP_MAP_16(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_18(f, x, ...) f(x), MMDEPLOY_PP_MAP_17(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_19(f, x, ...) f(x), MMDEPLOY_PP_MAP_18(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_20(f, x, ...) f(x), MMDEPLOY_PP_MAP_19(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_21(f, x, ...) f(x), MMDEPLOY_PP_MAP_20(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_22(f, x, ...) f(x), MMDEPLOY_PP_MAP_21(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_23(f, x, ...) f(x), MMDEPLOY_PP_MAP_22(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_24(f, x, ...) f(x), MMDEPLOY_PP_MAP_23(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_25(f, x, ...) f(x), MMDEPLOY_PP_MAP_24(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_26(f, x, ...) f(x), MMDEPLOY_PP_MAP_25(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_27(f, x, ...) f(x), MMDEPLOY_PP_MAP_26(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_28(f, x, ...) f(x), MMDEPLOY_PP_MAP_27(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_29(f, x, ...) f(x), MMDEPLOY_PP_MAP_28(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_30(f, x, ...) f(x), MMDEPLOY_PP_MAP_29(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_31(f, x, ...) f(x), MMDEPLOY_PP_MAP_30(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_32(f, x, ...) f(x), MMDEPLOY_PP_MAP_31(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_33(f, x, ...) f(x), MMDEPLOY_PP_MAP_32(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_34(f, x, ...) f(x), MMDEPLOY_PP_MAP_33(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_35(f, x, ...) f(x), MMDEPLOY_PP_MAP_34(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_36(f, x, ...) f(x), MMDEPLOY_PP_MAP_35(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_37(f, x, ...) f(x), MMDEPLOY_PP_MAP_36(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_38(f, x, ...) f(x), MMDEPLOY_PP_MAP_37(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_39(f, x, ...) f(x), MMDEPLOY_PP_MAP_38(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_40(f, x, ...) f(x), MMDEPLOY_PP_MAP_39(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_41(f, x, ...) f(x), MMDEPLOY_PP_MAP_40(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_42(f, x, ...) f(x), MMDEPLOY_PP_MAP_41(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_43(f, x, ...) f(x), MMDEPLOY_PP_MAP_42(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_44(f, x, ...) f(x), MMDEPLOY_PP_MAP_43(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_45(f, x, ...) f(x), MMDEPLOY_PP_MAP_44(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_46(f, x, ...) f(x), MMDEPLOY_PP_MAP_45(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_47(f, x, ...) f(x), MMDEPLOY_PP_MAP_46(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_48(f, x, ...) f(x), MMDEPLOY_PP_MAP_47(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_49(f, x, ...) f(x), MMDEPLOY_PP_MAP_48(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_50(f, x, ...) f(x), MMDEPLOY_PP_MAP_49(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_51(f, x, ...) f(x), MMDEPLOY_PP_MAP_50(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_52(f, x, ...) f(x), MMDEPLOY_PP_MAP_51(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_53(f, x, ...) f(x), MMDEPLOY_PP_MAP_52(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_54(f, x, ...) f(x), MMDEPLOY_PP_MAP_53(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_55(f, x, ...) f(x), MMDEPLOY_PP_MAP_54(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_56(f, x, ...) f(x), MMDEPLOY_PP_MAP_55(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_57(f, x, ...) f(x), MMDEPLOY_PP_MAP_56(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_58(f, x, ...) f(x), MMDEPLOY_PP_MAP_57(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_59(f, x, ...) f(x), MMDEPLOY_PP_MAP_58(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_60(f, x, ...) f(x), MMDEPLOY_PP_MAP_59(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_61(f, x, ...) f(x), MMDEPLOY_PP_MAP_60(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_62(f, x, ...) f(x), MMDEPLOY_PP_MAP_61(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_63(f, x, ...) f(x), MMDEPLOY_PP_MAP_62(f, __VA_ARGS__)
+#define MMDEPLOY_PP_MAP_64(f, x, ...) f(x), MMDEPLOY_PP_MAP_63(f, __VA_ARGS__)
+
+#define MMDEPLOY_PP_MAP(f, ...) \
+  _MMDEPLOY_PP_MAP_IMPL1(f, MMDEPLOY_PP_NARG(__VA_ARGS__), __VA_ARGS__)
+
+#define _MMDEPLOY_PP_MAP_IMPL1(f, n, ...) \
+  _MMDEPLOY_PP_MAP_IMPL2(f, MMDEPLOY_PP_CONCAT(MMDEPLOY_PP_MAP_, n), __VA_ARGS__)
+
+#define _MMDEPLOY_PP_MAP_IMPL2(f, M_, ...) M_(f, __VA_ARGS__)
+
 #endif  // MMDEPLOY_SRC_CORE_MARCO_H_
diff --git a/csrc/core/mat.h b/csrc/core/mat.h
index d6ec4ba206..4702df7e9e 100644
--- a/csrc/core/mat.h
+++ b/csrc/core/mat.h
@@ -11,7 +11,7 @@
 
 namespace mmdeploy {
 
-class Mat final {
+class MMDEPLOY_API Mat final {
  public:
   Mat() = default;
 
diff --git a/csrc/core/model.cpp b/csrc/core/model.cpp
index 080504a2a6..d4b6361a91 100644
--- a/csrc/core/model.cpp
+++ b/csrc/core/model.cpp
@@ -4,14 +4,7 @@
 
 #include "core/logger.h"
 #include "core/model_impl.h"
-
-#if __GNUC__ >= 8
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
+#include "core/utils/filesystem.h"
 
 using namespace std;
 
@@ -19,7 +12,7 @@ namespace mmdeploy {
 
 Model::Model(const std::string& model_path) {
   if (auto r = Model::Init(model_path); !r) {
-    ERROR("load model failed. Its file path is '{}'", model_path);
+    MMDEPLOY_ERROR("load model failed. Its file path is '{}'", model_path);
     r.error().throw_exception();
   }
 }
@@ -28,7 +21,7 @@ Model::Model(const void* buffer, size_t size) { Init(buffer, size).value(); }
 
 Result<void> Model::Init(const std::string& model_path) {
   if (!fs::exists(model_path)) {
-    ERROR("'{}' doesn't exist", model_path);
+    MMDEPLOY_ERROR("'{}' doesn't exist", model_path);
     return Status(eFileNotExist);
   }
 
@@ -42,13 +35,13 @@ Result<void> Model::Init(const std::string& model_path) {
     }
     OUTCOME_TRY(auto meta, impl->ReadMeta());
 
-    INFO("{} successfully load sdk model {}", entry.name, model_path);
+    MMDEPLOY_INFO("{} successfully load sdk model {}", entry.name, model_path);
     impl_ = std::move(impl);
     meta_ = std::move(meta);
     return success();
   }
 
-  ERROR("no ModelImpl can read sdk_model {}", model_path);
+  MMDEPLOY_ERROR("no ModelImpl can read sdk_model {}", model_path);
   return Status(eNotSupported);
 }
 
@@ -63,13 +56,13 @@ Result<void> Model::Init(const void* buffer, size_t size) {
     }
     OUTCOME_TRY(auto meta, impl->ReadMeta());
 
-    INFO("{} successfully load sdk model {}", entry.name);
+    MMDEPLOY_INFO("{} successfully load sdk model {}", entry.name);
     impl_ = std::move(impl);
     meta_ = std::move(meta);
     return success();
   }
 
-  ERROR("no ModelImpl can parse buffer");
+  MMDEPLOY_ERROR("no ModelImpl can parse buffer");
   return Status(eNotSupported);
 }
 
@@ -79,7 +72,7 @@ Result<model_meta_info_t> Model::GetModelConfig(const std::string& name) const {
       return info;
     }
   }
-  ERROR("cannot find model '{}' in meta file", name);
+  MMDEPLOY_ERROR("cannot find model '{}' in meta file", name);
   return Status(eEntryNotFound);
 }
 
@@ -87,14 +80,19 @@ Result<std::string> Model::ReadFile(const std::string& file_path) noexcept {
   return impl_->ReadFile(file_path);
 }
 
+ModelRegistry& ModelRegistry::Get() {
+  static ModelRegistry inst;
+  return inst;
+}
+
 Result<void> ModelRegistry::Register(const std::string& name, Creator creator) {
   for (auto& entry : entries_) {
     if (entry.name == name) {
-      ERROR("{} is already registered", name);
+      MMDEPLOY_ERROR("{} is already registered", name);
       return Status(eFail);
     }
   }
-  INFO("Register '{}'", name);
+  MMDEPLOY_INFO("Register '{}'", name);
   entries_.push_back({name, std::move(creator)});
   return success();
 }
diff --git a/csrc/core/model.h b/csrc/core/model.h
index a9ce11eff3..5193128c31 100644
--- a/csrc/core/model.h
+++ b/csrc/core/model.h
@@ -39,7 +39,7 @@ class ModelImpl;
  * in case of faster-rcnn model, it splits into two models, one is rpn and the
  * other is cnn for roi classification.
  */
-class Model {
+class MMDEPLOY_API Model {
  public:
   Model() = default;
 
@@ -115,7 +115,7 @@ class Model {
  * };
  * ANewModelImplRegister a_new_model_impl_register;
  */
-class ModelRegistry {
+class MMDEPLOY_API ModelRegistry {
  public:
   using Creator = std::function<std::unique_ptr<ModelImpl>()>;
   struct Entry {
@@ -126,10 +126,7 @@ class ModelRegistry {
   /**
    * @brief Return global instance of `ModelRegistry`
    */
-  static ModelRegistry& Get() {
-    static ModelRegistry inst;
-    return inst;
-  }
+  static ModelRegistry& Get();
 
   /**
    * @brief Register an sdk model format denoted by an specified `ModelImpl`
diff --git a/csrc/core/module.cpp b/csrc/core/module.cpp
index d21ea31bdc..28857f4206 100644
--- a/csrc/core/module.cpp
+++ b/csrc/core/module.cpp
@@ -6,7 +6,9 @@
 
 namespace mmdeploy {
 
-template class Registry<Module>;
-template class Creator<Module>;
+// template class Registry<Module>;
+// template class Creator<Module>;
+
+MMDEPLOY_DEFINE_REGISTRY(Module);
 
 }  // namespace mmdeploy
diff --git a/csrc/core/module.h b/csrc/core/module.h
index 96d0c5cffa..6debc6a6dd 100644
--- a/csrc/core/module.h
+++ b/csrc/core/module.h
@@ -4,17 +4,20 @@
 #define MMDEPLOY_SRC_CORE_MODULE_H_
 
 #include "core/macro.h"
+#include "core/registry.h"
 #include "core/status_code.h"
 #include "core/value.h"
 
 namespace mmdeploy {
 
-class MM_SDK_API Module {
+class MMDEPLOY_API Module {
  public:
   virtual ~Module() = default;
   virtual Result<Value> Process(const Value& args) = 0;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(Module);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_CORE_MODULE_H_
diff --git a/csrc/core/net.cpp b/csrc/core/net.cpp
index f548042507..9f057dc88e 100644
--- a/csrc/core/net.cpp
+++ b/csrc/core/net.cpp
@@ -6,7 +6,9 @@
 
 namespace mmdeploy {
 
-template class Registry<Net>;
-template class Creator<Net>;
+// template class Registry<Net>;
+// template class Creator<Net>;
+
+MMDEPLOY_DEFINE_REGISTRY(Net);
 
 }  // namespace mmdeploy
diff --git a/csrc/core/net.h b/csrc/core/net.h
index b96551cf90..c49a7ceeb7 100644
--- a/csrc/core/net.h
+++ b/csrc/core/net.h
@@ -22,6 +22,8 @@ class Net {
   virtual Result<void> ForwardAsync(Event* event) = 0;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(Net);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_SRC_CORE_NET_H_
diff --git a/csrc/core/operator.cpp b/csrc/core/operator.cpp
index c40bfee508..e4e536b2c5 100644
--- a/csrc/core/operator.cpp
+++ b/csrc/core/operator.cpp
@@ -2,6 +2,8 @@
 
 #include "operator.h"
 
+#include <algorithm>
+
 namespace mmdeploy::graph {
 
 Result<void> Gather(const Value::Array& array, const vector<int>& idxs, Value::Array& output) {
diff --git a/csrc/core/operator.h b/csrc/core/operator.h
index 0936fbfaf3..c71c62f051 100644
--- a/csrc/core/operator.h
+++ b/csrc/core/operator.h
@@ -11,13 +11,18 @@ using std::string;
 using std::tuple;
 using std::vector;
 
-Result<void> Gather(const Value::Array& array, const vector<int>& idxs, Value::Array& output);
-Result<void> Gather(Value::Array&& array, const vector<int>& idxs, Value::Array& output);
-Result<void> Gather(const Value::Object& object, const vector<std::string>& keys,
-                    Value::Array& output);
-Result<void> Gather(Value::Object&& object, const vector<std::string>& keys, Value::Array& output);
-Result<void> Scatter(Value::Array array, const vector<int>& idxs, Value::Array& output);
-Result<void> Scatter(Value::Array array, const vector<std::string>& keys, Value::Object& output);
+MMDEPLOY_API Result<void> Gather(const Value::Array& array, const vector<int>& idxs,
+                                 Value::Array& output);
+MMDEPLOY_API Result<void> Gather(Value::Array&& array, const vector<int>& idxs,
+                                 Value::Array& output);
+MMDEPLOY_API Result<void> Gather(const Value::Object& object, const vector<std::string>& keys,
+                                 Value::Array& output);
+MMDEPLOY_API Result<void> Gather(Value::Object&& object, const vector<std::string>& keys,
+                                 Value::Array& output);
+MMDEPLOY_API Result<void> Scatter(Value::Array array, const vector<int>& idxs,
+                                  Value::Array& output);
+MMDEPLOY_API Result<void> Scatter(Value::Array array, const vector<std::string>& keys,
+                                  Value::Object& output);
 
 inline Result<Value::Array> Gather(const Value::Array& array, const vector<int>& idxs) {
   Value::Array output;
@@ -95,13 +100,13 @@ Result<Value> Unflatten(V&& input, const vector<int>& idxs) {
 }
 
 // object of arrays -> array of objects, all arrays must be of same length
-Result<Value> DistribOA(const Value& oa);
+MMDEPLOY_API Result<Value> DistribOA(const Value& oa);
 
 // array of objects -> object of arrays, all objects must be isomorphic
-Result<Value> DistribAO(const Value& ao);
+MMDEPLOY_API Result<Value> DistribAO(const Value& ao);
 
 // array of arrays -> array of arrays, this is equivalent to transpose
-Result<Value> DistribAA(const Value& a);
+MMDEPLOY_API Result<Value> DistribAA(const Value& a);
 
 }  // namespace mmdeploy::graph
 
diff --git a/csrc/core/registry.cpp b/csrc/core/registry.cpp
new file mode 100644
index 0000000000..d0d543ee5d
--- /dev/null
+++ b/csrc/core/registry.cpp
@@ -0,0 +1,46 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "core/registry.h"
+
+namespace mmdeploy {
+
+Registry<void>::Registry() = default;
+
+Registry<void>::~Registry() = default;
+
+bool Registry<void>::AddCreator(Creator<void> &creator) {
+  MMDEPLOY_DEBUG("Adding creator: {}", creator.GetName());
+  auto key = creator.GetName();
+  if (entries_.find(key) == entries_.end()) {
+    entries_.insert(std::make_pair(key, &creator));
+    return true;
+  }
+
+  for (auto iter = entries_.lower_bound(key); iter != entries_.upper_bound(key); ++iter) {
+    if (iter->second->GetVersion() == creator.GetVersion()) {
+      return false;
+    }
+  }
+
+  entries_.insert(std::make_pair(key, &creator));
+  return true;
+}
+
+Creator<void> *Registry<void>::GetCreator(const std::string &type, int version) {
+  auto iter = entries_.find(type);
+  if (iter == entries_.end()) {
+    return nullptr;
+  }
+  if (0 == version) {
+    return iter->second;
+  }
+
+  for (auto iter = entries_.lower_bound(type); iter != entries_.upper_bound(type); ++iter) {
+    if (iter->second->GetVersion() == version) {
+      return iter->second;
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace mmdeploy
diff --git a/csrc/core/registry.h b/csrc/core/registry.h
index 03d5e3f234..bde878a35c 100644
--- a/csrc/core/registry.h
+++ b/csrc/core/registry.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <vector>
 
+#include "macro.h"
 #include "value.h"
 
 namespace mmdeploy {
@@ -30,73 +31,58 @@ using get_return_type_t = typename get_return_type<EntryType>::type;
 
 }  // namespace detail
 
+template <class EntryType>
+class Creator;
+
+template <>
+class Creator<void> {
+ public:
+  virtual ~Creator() = default;
+  virtual const char *GetName() const = 0;
+  virtual int GetVersion() const { return 0; }
+};
+
 template <typename EntryType>
-class Creator {
+class Creator : public Creator<void> {
  public:
   using ReturnType = detail::get_return_type_t<EntryType>;
 
  public:
-  virtual ~Creator() = default;
-  virtual const char *GetName() const = 0;
-  virtual int GetVersion() const = 0;
   virtual ReturnType Create(const Value &args) = 0;
 };
 
-template <typename EntryType>
-class Registry {
+template <class EntryType>
+class Registry;
+
+template <>
+class MMDEPLOY_API Registry<void> {
  public:
-  static Registry &Get() {
-    static Registry registry;
-    return registry;
-  }
+  Registry();
 
-  bool AddCreator(Creator<EntryType> &creator) {
-    auto key = creator.GetName();
-    if (entries_.find(key) == entries_.end()) {
-      entries_.insert(std::make_pair(key, &creator));
-      return true;
-    }
-
-    for (auto iter = entries_.lower_bound(key); iter != entries_.upper_bound(key); ++iter) {
-      if (iter->second->GetVersion() == creator.GetVersion()) {
-        return false;
-      }
-    }
-
-    entries_.insert(std::make_pair(key, &creator));
-    return true;
-  }
+  ~Registry();
+
+  bool AddCreator(Creator<void> &creator);
+
+  Creator<void> *GetCreator(const std::string &type, int version = 0);
+
+ private:
+  std::multimap<std::string, Creator<void> *> entries_;
+};
+
+template <class EntryType>
+class Registry : public Registry<void> {
+ public:
+  bool AddCreator(Creator<EntryType> &creator) { return Registry<void>::AddCreator(creator); }
 
   Creator<EntryType> *GetCreator(const std::string &type, int version = 0) {
-    auto iter = entries_.find(type);
-    if (iter == entries_.end()) {
-      return nullptr;
-    }
-    if (0 == version) {
-      return iter->second;
-    }
-
-    for (auto iter = entries_.lower_bound(type); iter != entries_.upper_bound(type); ++iter) {
-      if (iter->second->GetVersion() == version) {
-        return iter->second;
-      }
-    }
-    return nullptr;
+    auto creator = Registry<void>::GetCreator(type, version);
+    return static_cast<Creator<EntryType> *>(creator);
   }
 
-  std::vector<std::string> ListCreators() {
-    std::vector<std::string> keys;
-    for (const auto &[key, _] : entries_) {
-      keys.push_back(key);
-    }
-    return keys;
-  }
+  static Registry &Get();
 
  private:
   Registry() = default;
-
- private:
-  std::multimap<std::string, Creator<EntryType> *> entries_;
 };
 
 template <typename EntryType, typename CreatorType>
@@ -110,6 +96,17 @@ class Registerer {
 
 }  // namespace mmdeploy
 
+#define MMDEPLOY_DECLARE_REGISTRY(EntryType) \
+  template <>                                \
+  Registry<EntryType> &Registry<EntryType>::Get();
+
+#define MMDEPLOY_DEFINE_REGISTRY(EntryType)                         \
+  template <>                                                       \
+  MMDEPLOY_EXPORT Registry<EntryType> &Registry<EntryType>::Get() { \
+    static Registry v;                                              \
+    return v;                                                       \
+  }
+
 #define REGISTER_MODULE(EntryType, CreatorType) \
   static ::mmdeploy::Registerer<EntryType, CreatorType> g_register_##EntryType##_##CreatorType{};
 
diff --git a/csrc/core/serialization.h b/csrc/core/serialization.h
index aeea43bb50..6a37d8c3ff 100644
--- a/csrc/core/serialization.h
+++ b/csrc/core/serialization.h
@@ -8,47 +8,14 @@
 #include <type_traits>
 #include <utility>
 
+#include "core/macro.h"
 #include "core/status_code.h"
 #include "mpl/detected.h"
 #include "mpl/type_traits.h"
 
 namespace mmdeploy {
 
-#define _MMDEPLOY_NTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, \
-                          N, ...)                                                                \
-  N
-
-#define _MMDEPLOY_ARCHIVE_1(x) MMDEPLOY_NVP(x)
-#define _MMDEPLOY_ARCHIVE_2(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_1(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_3(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_2(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_4(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_3(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_5(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_4(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_6(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_5(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_7(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_6(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_8(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_7(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_9(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_8(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_10(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_9(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_11(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_10(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_12(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_11(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_13(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_12(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_14(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_13(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_15(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_14(__VA_ARGS__)
-#define _MMDEPLOY_ARCHIVE_16(x, ...) MMDEPLOY_NVP(x), _MMDEPLOY_ARCHIVE_15(__VA_ARGS__)
-
-#define _MMDEPLOY_ARCHIVE_DISPATCH(...)                                                   \
-  _MMDEPLOY_NTH_ARG(__VA_ARGS__, _MMDEPLOY_ARCHIVE_16(__VA_ARGS__),                       \
-                    _MMDEPLOY_ARCHIVE_15(__VA_ARGS__), _MMDEPLOY_ARCHIVE_14(__VA_ARGS__), \
-                    _MMDEPLOY_ARCHIVE_13(__VA_ARGS__), _MMDEPLOY_ARCHIVE_12(__VA_ARGS__), \
-                    _MMDEPLOY_ARCHIVE_11(__VA_ARGS__), _MMDEPLOY_ARCHIVE_10(__VA_ARGS__), \
-                    _MMDEPLOY_ARCHIVE_9(__VA_ARGS__), _MMDEPLOY_ARCHIVE_8(__VA_ARGS__),   \
-                    _MMDEPLOY_ARCHIVE_7(__VA_ARGS__), _MMDEPLOY_ARCHIVE_6(__VA_ARGS__),   \
-                    _MMDEPLOY_ARCHIVE_5(__VA_ARGS__), _MMDEPLOY_ARCHIVE_4(__VA_ARGS__),   \
-                    _MMDEPLOY_ARCHIVE_3(__VA_ARGS__), _MMDEPLOY_ARCHIVE_2(__VA_ARGS__),   \
-                    _MMDEPLOY_ARCHIVE_1(__VA_ARGS__))
-
-#define MMDEPLOY_ARCHIVE_NVP(archive, ...) archive(_MMDEPLOY_ARCHIVE_DISPATCH(__VA_ARGS__))
-
-#define MMDEPLOY_ARCHIVE(archive, ...) archive(__VA_ARGS__)
+#define MMDEPLOY_ARCHIVE_NVP(archive, ...) archive(MMDEPLOY_PP_MAP(MMDEPLOY_NVP, __VA_ARGS__))
 
 #define MMDEPLOY_ARCHIVE_MEMBERS(...)           \
   template <typename Archive>                   \
@@ -273,32 +240,26 @@ void load(Archive &&archive, T &&object) {
   }
 }
 
-template <typename Archive, typename T>
-using save_t = decltype(save(std::declval<Archive>(), std::declval<T>()));
-
 struct save_fn {
   template <typename Archive, typename T>
-  auto operator()(Archive &&a, T &&v) const -> save_t<Archive, T> {
+  auto operator()(Archive &&a, T &&v) const
+      -> decltype(save(std::forward<Archive>(a), std::forward<T>(v))) {
     return save(std::forward<Archive>(a), std::forward<T>(v));
   }
 };
 
-template <typename Archive, typename T>
-using load_t = decltype(load(std::declval<Archive>(), std::declval<T>()));
-
 struct load_fn {
   template <typename Archive, typename T>
-  auto operator()(Archive &&a, T &&v) const -> load_t<Archive, T> {
+  auto operator()(Archive &&a, T &&v) const
+      -> decltype(load(std::forward<Archive>(a), std::forward<T>(v))) {
     return load(std::forward<Archive>(a), std::forward<T>(v));
   }
 };
 
-template <typename Archive, typename T>
-using serialize_t = decltype(serialize(std::declval<Archive>(), std::declval<T>()));
-
 struct serialize_fn {
   template <typename Archive, typename T>
-  auto operator()(Archive &&a, T &&v) const -> serialize_t<Archive, T> {
+  auto operator()(Archive &&a, T &&v) const
+      -> decltype(serialize(std::forward<Archive>(a), std::forward<T>(v))) {
     return serialize(std::forward<Archive>(a), std::forward<T>(v));
   }
 };
@@ -319,15 +280,18 @@ struct adl_serializer;
 template <typename, typename>
 struct adl_serializer {
   template <typename Archive, typename T>
-  static auto save(Archive &&a, T &&v) -> detail::save_t<Archive, T> {
+  static auto save(Archive &&a, T &&v)
+      -> decltype(::mmdeploy::save(std::forward<Archive>(a), std::forward<T>(v))) {
     ::mmdeploy::save(std::forward<Archive>(a), std::forward<T>(v));
   }
   template <typename Archive, typename T>
-  static auto load(Archive &&a, T &&v) -> detail::load_t<Archive, T> {
+  static auto load(Archive &&a, T &&v)
+      -> decltype(::mmdeploy::load(std::forward<Archive>(a), std::forward<T>(v))) {
     ::mmdeploy::load(std::forward<Archive>(a), std::forward<T>(v));
   }
   template <typename Archive, typename T>
-  static auto serialize(Archive &&a, T &&v) -> detail::serialize_t<Archive, T> {
+  static auto serialize(Archive &&a, T &&v)
+      -> decltype(::mmdeploy::serialize(std::forward<Archive>(a), std::forward<T>(v))) {
     ::mmdeploy::serialize(std::forward<Archive>(a), std::forward<T>(v));
   }
 };
diff --git a/csrc/core/status_code.h b/csrc/core/status_code.h
index 3b719e677a..f84ce8a886 100644
--- a/csrc/core/status_code.h
+++ b/csrc/core/status_code.h
@@ -5,6 +5,7 @@
 
 #include <system_error>
 
+#include "core/macro.h"
 #include "outcome-experimental.hpp"
 #if MMDEPLOY_STATUS_USE_SOURCE_LOCATION
 #include "utils/source_location.h"
@@ -71,7 +72,7 @@ inline const char *to_string(ErrorCode code) {
   }
 }
 
-struct Status {
+struct MMDEPLOY_API Status {
   ErrorCode ec{};
   Status() = default;
   SYSTEM_ERROR2_NAMESPACE::status_code_domain::string_ref message() const;
@@ -94,7 +95,7 @@ class StatusDomain;
 
 using StatusCode = SYSTEM_ERROR2_NAMESPACE::status_code<StatusDomain>;
 
-class StatusDomain : public SYSTEM_ERROR2_NAMESPACE::status_code_domain {
+class MMDEPLOY_API StatusDomain : public SYSTEM_ERROR2_NAMESPACE::status_code_domain {
   using _base = status_code_domain;
 
  public:
diff --git a/csrc/core/tensor.cpp b/csrc/core/tensor.cpp
index aed5d6c3e0..6a040ce046 100644
--- a/csrc/core/tensor.cpp
+++ b/csrc/core/tensor.cpp
@@ -87,16 +87,16 @@ void Tensor::Reshape(const TensorShape& shape) {
 
 Result<void> Tensor::CopyFrom(const Tensor& tensor, Stream stream) {
   if (desc_.shape.empty() || tensor.desc().shape.empty()) {
-    ERROR("uninitialized tensor");
+    MMDEPLOY_ERROR("uninitialized tensor");
     return Status(eInvalidArgument);
   }
   if (!(desc_.shape == tensor.desc().shape)) {
-    ERROR("mismatched shape {} vs {}", shape_string(desc_.shape),
-          shape_string(tensor.desc().shape));
+    MMDEPLOY_ERROR("mismatched shape {} vs {}", shape_string(desc_.shape),
+                   shape_string(tensor.desc().shape));
     return Status(eShapeMismatch);
   }
   if (desc_.data_type != tensor.desc().data_type) {
-    ERROR("mismatched data type {} vs {}", desc_.data_type, tensor.desc().data_type);
+    MMDEPLOY_ERROR("mismatched data type {} vs {}", desc_.data_type, tensor.desc().data_type);
     return Status(eShapeMismatch);
   }
   Allocate();
@@ -112,17 +112,17 @@ Result<void> Tensor::CopyFrom(const Tensor& tensor, Stream stream) {
 
 Result<void> Tensor::CopyTo(Tensor& tensor, Stream stream) const {
   if (desc_.shape.empty() || tensor.desc().shape.empty()) {
-    ERROR("uninitialized tensor");
+    MMDEPLOY_ERROR("uninitialized tensor");
     return Status(eInvalidArgument);
   }
 
   if (!(desc_.shape == tensor.desc().shape)) {
-    ERROR("mismatched shape {} vs {}", shape_string(desc_.shape),
-          shape_string(tensor.desc().shape));
+    MMDEPLOY_ERROR("mismatched shape {} vs {}", shape_string(desc_.shape),
+                   shape_string(tensor.desc().shape));
     return Status(eShapeMismatch);
   }
   if (desc_.data_type != tensor.desc().data_type) {
-    ERROR("mismatched data type {} vs {}", desc_.data_type, tensor.desc().data_type);
+    MMDEPLOY_ERROR("mismatched data type {} vs {}", desc_.data_type, tensor.desc().data_type);
     return Status(eShapeMismatch);
   }
   tensor.Allocate();
@@ -140,7 +140,7 @@ Result<void> Tensor::CopyFrom(void* host_ptr, Stream stream) {
     return Status(eInvalidArgument);
   }
   if (desc_.shape.empty()) {
-    ERROR("uninitialized tensor");
+    MMDEPLOY_ERROR("uninitialized tensor");
     return Status(eInvalidArgument);
   }
   Allocate();
@@ -157,7 +157,7 @@ Result<void> Tensor::CopyTo(void* host_ptr, Stream stream) const {
     return Status(eInvalidArgument);
   }
   if (desc_.shape.empty()) {
-    ERROR("uninitialized tensor");
+    MMDEPLOY_ERROR("uninitialized tensor");
     return Status(eInvalidArgument);
   }
   if (!stream) {
diff --git a/csrc/core/tensor.h b/csrc/core/tensor.h
index 264c6d7b84..78be82ad39 100644
--- a/csrc/core/tensor.h
+++ b/csrc/core/tensor.h
@@ -19,7 +19,7 @@ struct TensorDesc {
   std::string name;
 };
 
-class Tensor {
+class MMDEPLOY_API Tensor {
  public:
   Tensor() = default;
   Tensor(const Tensor&) = default;
diff --git a/csrc/core/utils/device_utils.h b/csrc/core/utils/device_utils.h
index 81621efecc..65422664e8 100644
--- a/csrc/core/utils/device_utils.h
+++ b/csrc/core/utils/device_utils.h
@@ -14,7 +14,8 @@ namespace mmdeploy {
  * @param stream
  * @return
  */
-Result<Mat> MakeAvailableOnDevice(const Mat& src, const Device& device, Stream& stream);
+MMDEPLOY_API Result<Mat> MakeAvailableOnDevice(const Mat& src, const Device& device,
+                                               Stream& stream);
 
 /**
  *
@@ -23,7 +24,8 @@ Result<Mat> MakeAvailableOnDevice(const Mat& src, const Device& device, Stream&
  * @param stream
  * @return
  */
-Result<Tensor> MakeAvailableOnDevice(const Tensor& src, const Device& device, Stream& stream);
+MMDEPLOY_API Result<Tensor> MakeAvailableOnDevice(const Tensor& src, const Device& device,
+                                                  Stream& stream);
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_TRANSFORM_UTILS_H
diff --git a/csrc/core/utils/filesystem.h b/csrc/core/utils/filesystem.h
new file mode 100644
index 0000000000..7aca6a8d8e
--- /dev/null
+++ b/csrc/core/utils/filesystem.h
@@ -0,0 +1,15 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CSRC_CORE_UTILS_FILESYSTEM_H_
+#define MMDEPLOY_CSRC_CORE_UTILS_FILESYSTEM_H_
+
+// TODO: what about clang?
+#if __GNUC__ >= 8 || _MSC_VER
+#include <filesystem>
+namespace fs = std::filesystem;
+#else
+#include <experimental/filesystem>
+namespace fs = std::experimental::filesystem;
+#endif
+
+#endif  // MMDEPLOY_CSRC_CORE_UTILS_FILESYSTEM_H_
diff --git a/csrc/core/utils/formatter.h b/csrc/core/utils/formatter.h
index 14075b3866..af28f8c9c0 100644
--- a/csrc/core/utils/formatter.h
+++ b/csrc/core/utils/formatter.h
@@ -13,7 +13,7 @@ namespace mmdeploy {
 
 class Value;
 
-std::string format_value(const Value& value);
+MMDEPLOY_API std::string format_value(const Value& value);
 
 }  // namespace mmdeploy
 
diff --git a/csrc/core/utils/source_location.h b/csrc/core/utils/source_location.h
index b7362cc12b..f0d579b76b 100644
--- a/csrc/core/utils/source_location.h
+++ b/csrc/core/utils/source_location.h
@@ -3,7 +3,7 @@
 #ifndef MMDEPLOY_SRC_UTILS_SOURCE_LOCATION_H_
 #define MMDEPLOY_SRC_UTILS_SOURCE_LOCATION_H_
 
-#if __has_include(<source_location>)
+#if __has_include(<source_location>) && !_MSC_VER
 #include <source_location>
 namespace mmdeploy {
 using SourceLocation = std::source_location;
diff --git a/csrc/core/utils/stacktrace.h b/csrc/core/utils/stacktrace.h
index 1c1cacbb7d..53b1a44b99 100644
--- a/csrc/core/utils/stacktrace.h
+++ b/csrc/core/utils/stacktrace.h
@@ -4,6 +4,7 @@
 #define MMDEPLOY_SRC_CORE_STACKTRACE_H_
 
 #include <memory>
+#include <string>
 
 namespace mmdeploy {
 
diff --git a/csrc/core/value.h b/csrc/core/value.h
index b73fba4302..3241330565 100644
--- a/csrc/core/value.h
+++ b/csrc/core/value.h
@@ -650,22 +650,22 @@ class Value {
 
   template <typename Key>
   bool contains(Key&& key) const {
-    return _unwrap().template _contains(std::forward<Key>(key));
+    return _unwrap()._contains(std::forward<Key>(key));
   }
 
   template <typename Key>
   iterator find(Key&& key) {
-    return _unwrap().template _find(std::forward<Key>(key));
+    return _unwrap()._find(std::forward<Key>(key));
   }
 
   template <typename Key>
   const_iterator find(Key&& key) const {
-    return _unwrap().template _find(std::forward<Key>(key));
+    return _unwrap()._find(std::forward<Key>(key));
   }
 
   template <typename T>
   T value(const typename Object::key_type& key, const T& default_value) const {
-    return _unwrap().template _value(key, default_value);
+    return _unwrap()._value(key, default_value);
   }
 
   iterator begin() { return _unwrap()._begin(); }
diff --git a/csrc/device/cpu/CMakeLists.txt b/csrc/device/cpu/CMakeLists.txt
index 226d0894bf..f7e7c46117 100644
--- a/csrc/device/cpu/CMakeLists.txt
+++ b/csrc/device/cpu/CMakeLists.txt
@@ -2,10 +2,14 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_cpu_device)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 file(GLOB_RECURSE SRCS "*.cpp")
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PUBLIC pthread PRIVATE mmdeploy::core)
+
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package(Threads REQUIRED)
+target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads)
+
 add_library(mmdeploy::device::cpu ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/device/cuda/CMakeLists.txt b/csrc/device/cuda/CMakeLists.txt
index 6a36e513bd..1ac67bb8b8 100644
--- a/csrc/device/cuda/CMakeLists.txt
+++ b/csrc/device/cuda/CMakeLists.txt
@@ -9,17 +9,13 @@ if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
     cmake_policy(SET CMP0104 OLD)
 endif ()
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
-set_targets(${PROJECT_NAME} CUDA_DEVICE_OBJ CUDA_DEVICE_STATIC CUDA_DEVICE_SHARED)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(SRCS
         cuda_device.cpp
         cuda_builtin_kernels.cu)
-build_target(${PROJECT_NAME} "${SRCS}")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS})
 target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
-target_link_libraries(${PROJECT_NAME}
-        PRIVATE mmdeploy::core
-        PUBLIC cudart cuda)
+target_link_libraries(${PROJECT_NAME} PRIVATE cudart cuda)
 add_library(mmdeploy::device::cuda ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/device/cuda/buddy_allocator.h b/csrc/device/cuda/buddy_allocator.h
index f94ec1c046..3e26a0edff 100644
--- a/csrc/device/cuda/buddy_allocator.h
+++ b/csrc/device/cuda/buddy_allocator.h
@@ -25,7 +25,7 @@ class BuddyAllocator {
     block_count_ = size / block_size_;
     if (!IsPowerOfTwo(block_count_)) {
       block_count_ = RoundToPowerOfTwo(block_count_);
-      WARN("Rounding up block_count to next power of 2 {}", block_count_);
+      MMDEPLOY_WARN("Rounding up block_count to next power of 2 {}", block_count_);
     }
     base_ = LogPowerOfTwo(block_count_);
     size_ = block_size_ * block_count_;
@@ -34,17 +34,18 @@ class BuddyAllocator {
     free_.resize(base_ + 1);
     Build(1, 0);
     Add(1, 0);
-    ERROR("size = {}, block_size = {}, block_count = {}", size_, block_size_, block_count_);
+    MMDEPLOY_ERROR("size = {}, block_size = {}, block_count = {}", size_, block_size_,
+                   block_count_);
     size = size_;
     for (int i = 0; i <= base_; ++i) {
-      ERROR("level {}, size = {}", i, size);
+      MMDEPLOY_ERROR("level {}, size = {}", i, size);
       size /= 2;
     }
   }
 
   ~BuddyAllocator() {
     for (int i = 0; i < free_.size(); ++i) {
-      ERROR("free_[{}].size(): {}", i, free_[i].size());
+      MMDEPLOY_ERROR("free_[{}].size(): {}", i, free_[i].size());
     }
     gDefaultAllocator().Deallocate(memory_, size_);
   }
@@ -62,7 +63,7 @@ class BuddyAllocator {
       }
     }
     if (level < 0) {
-      WARN("failed to allocate memory size = {} bytes", n);
+      MMDEPLOY_WARN("failed to allocate memory size = {} bytes", n);
       return nullptr;
     }
     for (; level < n_level; ++level) {
@@ -80,7 +81,7 @@ class BuddyAllocator {
     std::lock_guard lock{mutex_};
     auto offset = static_cast<uint8_t*>(p) - static_cast<uint8_t*>(memory_);
     if (offset < 0 || offset % block_size_) {
-      ERROR("invalid address: {}", p);
+      MMDEPLOY_ERROR("invalid address: {}", p);
     }
     offset /= static_cast<long>(block_size_);
     auto level = GetLevel(n);
diff --git a/csrc/device/cuda/cuda_builtin_kernels.cu b/csrc/device/cuda/cuda_builtin_kernels.cu
index c2cf5460cf..463da81369 100644
--- a/csrc/device/cuda/cuda_builtin_kernels.cu
+++ b/csrc/device/cuda/cuda_builtin_kernels.cu
@@ -1,5 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <cuda_runtime.h>
+
 namespace mmdeploy {
 namespace cuda {
 
@@ -17,8 +19,8 @@ __global__ void FillKernel(void* dst, size_t dst_size, const void* pattern, size
 
 int Fill(void* dst, size_t dst_size, const void* pattern, size_t pattern_size,
          cudaStream_t stream) {
-  const uint n_threads = 256;
-  const uint n_blocks = (dst_size + n_threads - 1) / n_threads;
+  const unsigned int n_threads = 256;
+  const unsigned int n_blocks = (dst_size + n_threads - 1) / n_threads;
 
   FillKernel<<<n_blocks, n_threads, 0, stream>>>(dst, dst_size, pattern, pattern_size);
 
diff --git a/csrc/device/cuda/cuda_device.cpp b/csrc/device/cuda/cuda_device.cpp
index 768a267d90..9825182e9e 100644
--- a/csrc/device/cuda/cuda_device.cpp
+++ b/csrc/device/cuda/cuda_device.cpp
@@ -79,9 +79,9 @@ Allocator CreateDefaultAllocator() {
   using namespace device_allocator;
   AllocatorImplPtr allocator = std::make_shared<Mallocator>();
   allocator = std::make_shared<Stats>(allocator, "cudaMalloc");
-  allocator = std::make_shared<Tree>(allocator, -1, .0);
+  allocator = std::make_shared<Tree>(allocator, -1, .5);
   allocator = std::make_shared<Stats>(allocator, "Tree");
-  INFO("Default CUDA allocator initialized");
+  MMDEPLOY_INFO("Default CUDA allocator initialized");
   return Access::create<Allocator>(allocator);
 }
 
@@ -265,7 +265,7 @@ void CudaPlatformImpl::PerDeviceData::init() {
 CudaPlatformImpl::CudaPlatformImpl() {
   int count{};
   if (auto err = cudaGetDeviceCount(&count); err != cudaSuccess) {
-    ERROR("error getting device count: {}", cudaGetErrorString(err));
+    MMDEPLOY_ERROR("error getting device count: {}", cudaGetErrorString(err));
     throw_exception(eFail);
   }
   per_device_data_storage_.reserve(count);
diff --git a/csrc/device/cuda/cuda_device.h b/csrc/device/cuda/cuda_device.h
index 71623d42b6..d4588d2fd9 100644
--- a/csrc/device/cuda/cuda_device.h
+++ b/csrc/device/cuda/cuda_device.h
@@ -16,6 +16,16 @@ class CudaPlatformImpl : public PlatformImpl {
  public:
   CudaPlatformImpl();
 
+  ~CudaPlatformImpl() override {
+    // The CUDA driver may have already shutdown before the platform dtor is called.
+    // As a workaround, simply leak per device resources and let the driver handle it
+    // FIXME: maybe a pair of global mmdeploy_init/deinit function would be a
+    //  better solution
+    for (auto& data : per_device_data_storage_) {
+      data.release();
+    }
+  }
+
   const char* GetPlatformName() const noexcept override { return "cuda"; }
 
   shared_ptr<BufferImpl> CreateBuffer(Device device) override;
diff --git a/csrc/device/cuda/default_allocator.h b/csrc/device/cuda/default_allocator.h
index ca4d794e9d..a8b2177ccc 100644
--- a/csrc/device/cuda/default_allocator.h
+++ b/csrc/device/cuda/default_allocator.h
@@ -16,11 +16,11 @@ class DefaultAllocator {
  public:
   DefaultAllocator() = default;
   ~DefaultAllocator() {
-    ERROR("=== CUDA Default Allocator ===");
-    ERROR("  Allocation: count={}, size={}MB, time={}ms", alloc_count_,
-          alloc_size_ / (1024 * 1024.f), alloc_time_ / 1000000.f);
-    ERROR("Deallocation: count={}, size={}MB, time={}ms", dealloc_count_,
-          dealloc_size_ / (1024 * 1024.f), dealloc_time_ / 1000000.f);
+    MMDEPLOY_ERROR("=== CUDA Default Allocator ===");
+    MMDEPLOY_ERROR("  Allocation: count={}, size={}MB, time={}ms", alloc_count_,
+                   alloc_size_ / (1024 * 1024.f), alloc_time_ / 1000000.f);
+    MMDEPLOY_ERROR("Deallocation: count={}, size={}MB, time={}ms", dealloc_count_,
+                   dealloc_size_ / (1024 * 1024.f), dealloc_time_ / 1000000.f);
   }
   [[nodiscard]] void* Allocate(std::size_t n) {
     void* p{};
@@ -29,7 +29,7 @@ class DefaultAllocator {
     auto t1 = std::chrono::high_resolution_clock::now();
     alloc_time_ += (int64_t)std::chrono::duration<double, std::nano>(t1 - t0).count();
     if (ret != cudaSuccess) {
-      ERROR("error allocating cuda memory: {}", cudaGetErrorString(ret));
+      MMDEPLOY_ERROR("error allocating cuda memory: {}", cudaGetErrorString(ret));
       return nullptr;
     }
     alloc_count_ += 1;
@@ -43,7 +43,7 @@ class DefaultAllocator {
     auto t1 = std::chrono::high_resolution_clock::now();
     dealloc_time_ += (int64_t)std::chrono::duration<double, std::nano>(t1 - t0).count();
     if (ret != cudaSuccess) {
-      ERROR("error deallocating cuda memory: {}", cudaGetErrorString(ret));
+      MMDEPLOY_ERROR("error deallocating cuda memory: {}", cudaGetErrorString(ret));
       return;
     }
     dealloc_count_ += 1;
diff --git a/csrc/device/cuda/linear_allocator.h b/csrc/device/cuda/linear_allocator.h
index 15be01bc36..59133e9332 100644
--- a/csrc/device/cuda/linear_allocator.h
+++ b/csrc/device/cuda/linear_allocator.h
@@ -25,11 +25,11 @@ class LinearAllocator {
     std::size_t space = base_ + size_ - ptr_;
 
     if (std::align(16, n, ptr, space)) {
-      ERROR("success n={}, total={}, count={}", n, total_, count_);
+      MMDEPLOY_ERROR("success n={}, total={}, count={}", n, total_, count_);
       ptr_ = static_cast<uint8_t*>(ptr) + n;
       return ptr;
     }
-    ERROR("fallback {}, total={}, count={}", n, total_, count_);
+    MMDEPLOY_ERROR("fallback {}, total={}, count={}", n, total_, count_);
     return gDefaultAllocator().Allocate(n);
   }
   void Deallocate(void* _p, std::size_t n) {
@@ -43,7 +43,7 @@ class LinearAllocator {
     }
     total_ -= n;
     --count_;
-    ERROR("deallocate total={}, count={}", total_, count_);
+    MMDEPLOY_ERROR("deallocate total={}, count={}", total_, count_);
     if (total_ == 0) {
       assert(count_ == 0);
       ptr_ = base_;
diff --git a/csrc/device/device_allocator.h b/csrc/device/device_allocator.h
index 4539e12d4e..06bb5730df 100644
--- a/csrc/device/device_allocator.h
+++ b/csrc/device/device_allocator.h
@@ -162,12 +162,14 @@ class Stats : public AllocatorImpl {
       : allocator_(std::move(allocator)), name_(std::move(name)) {}
 
   ~Stats() override {
-    INFO("=== {} ===", name_);
-    INFO("  Allocation: count={}, size={}MB, time={}ms", data_.allocation_count,
-         data_.allocated_bytes / (1024 * 1024.f), static_cast<float>(data_.allocation_time));
-    INFO("Deallocation: count={}, size={}MB, time={}ms", data_.deallocation_count,
-         data_.deallocated_bytes / (1024 * 1024.f), static_cast<float>(data_.deallocation_time));
-    INFO("Peak memory usage: size={}MB", data_.peak / (1024 * 1024.f));
+    MMDEPLOY_INFO("=== {} ===", name_);
+    MMDEPLOY_INFO("  Allocation: count={}, size={}MB, time={}ms", data_.allocation_count,
+                  data_.allocated_bytes / (1024 * 1024.f),
+                  static_cast<float>(data_.allocation_time));
+    MMDEPLOY_INFO("Deallocation: count={}, size={}MB, time={}ms", data_.deallocation_count,
+                  data_.deallocated_bytes / (1024 * 1024.f),
+                  static_cast<float>(data_.deallocation_time));
+    MMDEPLOY_INFO("Peak memory usage: size={}MB", data_.peak / (1024 * 1024.f));
   }
 
   Block Allocate(size_t size) noexcept override {
@@ -281,10 +283,10 @@ class Bucketizer : public AllocatorImpl {
   Bucketizer(const AllocatorCreator& creator, size_t min_size, size_t max_size, size_t step_size)
       : min_size_(min_size), max_size_(max_size), step_size_(step_size) {
     for (auto base = min_size_; base < max_size_; base += step_size_) {
-      //      ERROR("{}, {}", base, base + step_size - 1);
+      //      MMDEPLOY_ERROR("{}, {}", base, base + step_size - 1);
       allocator_.push_back(creator(base, base + step_size - 1));
     }
-    //    ERROR("{}", allocator_.size());
+    //    MMDEPLOY_ERROR("{}", allocator_.size());
   }
 
   Block Allocate(size_t size) noexcept override {
diff --git a/csrc/experimental/collection.h b/csrc/experimental/collection.h
index e26be6da99..a65de2ec65 100644
--- a/csrc/experimental/collection.h
+++ b/csrc/experimental/collection.h
@@ -1,93 +1,93 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#ifndef MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
-#define MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
-
-#include "token.h"
-
-namespace mmdeploy {
-
-class Collection {
- public:
-  template <typename... Args>
-  friend Collection& operator<<(Collection& c, const Token<Args...>& value) {
-    c.put(value);
-    return c;
-  }
-
-  template <typename... Args>
-  friend const Collection& operator>>(const Collection& c, Token<Args...>& value) {
-    c.get(value);
-    return c;
-  }
-
-  template <typename T>
-  Result<T> maybe() const {
-    T token;
-    if (get(token)) {
-      return token;
-    }
-    return Status(eFail);
-  }
-
- private:
-  std::vector<std::string> keys_;
-  std::vector<std::shared_ptr<void>> values_;
-
-  template <typename... Args>
-  void put(const Token<Args...>& value) {
-    keys_.push_back(Token<Args...>::key());
-    values_.push_back(std::make_shared<Token<Args...>>(value));
-  }
-
-  template <typename... Args>
-  bool get(Token<Args...>& value) const {
-    for (int i = 0; i < keys_.size(); ++i) {
-      if (keys_[i] == Token<Args...>::key()) {
-        value = *static_cast<Token<Args...>*>(values_[i].get());
-        return true;
-      }
-    }
-    return false;
-  }
-};
-
-namespace detail {
-
-template <typename T>
-struct function_traits {
-  template <typename R, typename... As>
-  static std::tuple<As...> get_args(std::function<R(As...)>);
-
-  template <typename R, typename... Args>
-  static R get_ret(std::function<R(Args...)>);
-
-  using args_t = decltype(get_args(std::function{std::declval<T>()}));
-  using ret_t = decltype(get_ret(std::function{std::declval<T>()}));
-};
-
-// TODO: obtain first error
-// TODO: combine all errors
-template <typename F, typename... Args, typename Ret = std::invoke_result_t<F, Args...>>
-Result<Ret> Apply(F&& f, const Result<Args>&... args) {
-  if ((... && args)) {
-    return std::invoke(std::forward<F>(f), args.value()...);
-  }
-  return Status(eFail);
-}
-
-template <typename F, typename... Args, typename Ret = std::invoke_result_t<F, Args...>>
-Result<Ret> ApplyImpl(F&& f, const Collection& c, std::tuple<Args...>*) {
-  return Apply(std::forward<F>(f), c.maybe<Args>()...);
-}
-
-}  // namespace detail
-
-template <typename F, typename Args = typename detail::function_traits<F>::args_t>
-decltype(auto) Apply(F&& f, const Collection& c) {
-  return detail::ApplyImpl(std::forward<F>(f), c, std::add_pointer_t<Args>{});
-}
-
-}  // namespace mmdeploy
-
-#endif  // MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
+//// Copyright (c) OpenMMLab. All rights reserved.
+//
+//#ifndef MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
+//#define MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
+//
+//#include "token.h"
+//
+// namespace mmdeploy {
+//
+// class Collection {
+// public:
+//  template <typename... Args>
+//  friend Collection& operator<<(Collection& c, const Token<Args...>& value) {
+//    c.put(value);
+//    return c;
+//  }
+//
+//  template <typename... Args>
+//  friend const Collection& operator>>(const Collection& c, Token<Args...>& value) {
+//    c.get(value);
+//    return c;
+//  }
+//
+//  template <typename T>
+//  Result<T> maybe() const {
+//    T token;
+//    if (get(token)) {
+//      return token;
+//    }
+//    return Status(eFail);
+//  }
+//
+// private:
+//  std::vector<std::string> keys_;
+//  std::vector<std::shared_ptr<void>> values_;
+//
+//  template <typename... Args>
+//  void put(const Token<Args...>& value) {
+//    keys_.push_back(Token<Args...>::key());
+//    values_.push_back(std::make_shared<Token<Args...>>(value));
+//  }
+//
+//  template <typename... Args>
+//  bool get(Token<Args...>& value) const {
+//    for (int i = 0; i < keys_.size(); ++i) {
+//      if (keys_[i] == Token<Args...>::key()) {
+//        value = *static_cast<Token<Args...>*>(values_[i].get());
+//        return true;
+//      }
+//    }
+//    return false;
+//  }
+//};
+//
+// namespace detail {
+//
+// template <typename T>
+// struct function_traits {
+//  template <typename R, typename... As>
+//  static std::tuple<As...> get_args(std::function<R(As...)>);
+//
+//  template <typename R, typename... Args>
+//  static R get_ret(std::function<R(Args...)>);
+//
+//  using args_t = decltype(get_args(std::function{std::declval<T>()}));
+//  using ret_t = decltype(get_ret(std::function{std::declval<T>()}));
+//};
+//
+//// TODO: obtain first error
+//// TODO: combine all errors
+// template <typename F, typename... Args, typename Ret = std::invoke_result_t<F, Args...>>
+// Result<Ret> Apply(F&& f, const Result<Args>&... args) {
+//   if ((... && args)) {
+//     return std::invoke(std::forward<F>(f), args.value()...);
+//   }
+//   return Status(eFail);
+// }
+//
+// template <typename F, typename... Args, typename Ret = std::invoke_result_t<F, Args...>>
+// Result<Ret> ApplyImpl(F&& f, const Collection& c, std::tuple<Args...>*) {
+//   return Apply(std::forward<F>(f), c.maybe<Args>()...);
+// }
+//
+// }  // namespace detail
+//
+// template <typename F, typename Args = typename detail::function_traits<F>::args_t>
+// decltype(auto) Apply(F&& f, const Collection& c) {
+//   return detail::ApplyImpl(std::forward<F>(f), c, std::add_pointer_t<Args>{});
+// }
+//
+// }  // namespace mmdeploy
+//
+//#endif  // MMDEPLOY_SRC_EXPERIMENTAL_COLLECTION_H_
diff --git a/csrc/experimental/module_adapter.h b/csrc/experimental/module_adapter.h
index 8d652cfb4f..581e7f2bea 100644
--- a/csrc/experimental/module_adapter.h
+++ b/csrc/experimental/module_adapter.h
@@ -31,7 +31,7 @@ struct InvokeImpl {
                             std::forward<Ts>(ts)...);
       return make_ret_val(std::move(ret));
     } catch (const std::exception& e) {
-      ERROR("unhandled exception: {}", e.what());
+      MMDEPLOY_ERROR("unhandled exception: {}", e.what());
       return Status(eFail);
     } catch (...) {
       return Status(eFail);
diff --git a/csrc/experimental/token.h b/csrc/experimental/token.h
index e1c951352e..6d6ae7f884 100644
--- a/csrc/experimental/token.h
+++ b/csrc/experimental/token.h
@@ -1,72 +1,72 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#ifndef MMDEPLOY_SRC_TOKEN_TOKEN_H_
-#define MMDEPLOY_SRC_TOKEN_TOKEN_H_
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "core/status_code.h"
-
-namespace mmdeploy {
-
-namespace token {
-
-template <char... cs>
-using String = std::integer_sequence<char, cs...>;
-
-// this is a GCC only extension
-template <typename T, T... cs>
-constexpr String<cs...> operator""_ts() {
-  return {};
-}
-
-template <char... cs>
-const char* c_str(String<cs...>) {
-  static constexpr const char str[sizeof...(cs) + 1] = {cs..., '\0'};
-  return str;
-}
-
-}  // namespace token
-
-// template <typename T>
-// static void* signature() {
-//   static char id = 0;
-//   return &id;
+//// Copyright (c) OpenMMLab. All rights reserved.
+//
+//#ifndef MMDEPLOY_SRC_TOKEN_TOKEN_H_
+//#define MMDEPLOY_SRC_TOKEN_TOKEN_H_
+//
+//#include <cstdint>
+//#include <memory>
+//#include <string>
+//#include <type_traits>
+//#include <utility>
+//#include <vector>
+//
+//#include "core/status_code.h"
+//
+// namespace mmdeploy {
+//
+// namespace token {
+//
+// template <char... cs>
+// using String = std::integer_sequence<char, cs...>;
+//
+//// this is a GCC only extension
+// template <typename T, T... cs>
+// constexpr String<cs...> operator""_ts() {
+//   return {};
 // }
 //
-// using signature_t = decltype(signature<void>());
-
-template <typename T, typename Key>
-struct Token {
-  using signature_t = void*;
-  using value_type = T;
-
-  Token(T value = {}) : value_(value) {}  // NOLINT
-
-  operator T() const { return value_; }  // NOLINT
-  static const char* key() { return token::c_str(Key{}); }
-
-  T& operator*() { return value_; }
-  T* operator->() { return &value_; }
-
- private:
-  T value_;
-};
-
-template <typename T>
-class Identifier {
- public:
-  constexpr explicit Identifier(const char* key) : key_(key) {}
-  const char* key_;
-};
-
-constexpr inline Identifier<int> batch_size{"batch_size"};
-
-}  // namespace mmdeploy
-
-#endif  // MMDEPLOY_SRC_TOKEN_TOKEN_H_
+// template <char... cs>
+// const char* c_str(String<cs...>) {
+//   static constexpr const char str[sizeof...(cs) + 1] = {cs..., '\0'};
+//   return str;
+// }
+//
+// }  // namespace token
+//
+//// template <typename T>
+//// static void* signature() {
+////   static char id = 0;
+////   return &id;
+//// }
+////
+//// using signature_t = decltype(signature<void>());
+//
+// template <typename T, typename Key>
+// struct Token {
+//  using signature_t = void*;
+//  using value_type = T;
+//
+//  Token(T value = {}) : value_(value) {}  // NOLINT
+//
+//  operator T() const { return value_; }  // NOLINT
+//  static const char* key() { return token::c_str(Key{}); }
+//
+//  T& operator*() { return value_; }
+//  T* operator->() { return &value_; }
+//
+// private:
+//  T value_;
+//};
+//
+// template <typename T>
+// class Identifier {
+// public:
+//  constexpr explicit Identifier(const char* key) : key_(key) {}
+//  const char* key_;
+//};
+//
+// constexpr inline Identifier<int> batch_size{"batch_size"};
+//
+//}  // namespace mmdeploy
+//
+//#endif  // MMDEPLOY_SRC_TOKEN_TOKEN_H_
diff --git a/csrc/graph/CMakeLists.txt b/csrc/graph/CMakeLists.txt
index a0c09946e1..e39fbcf5c2 100644
--- a/csrc/graph/CMakeLists.txt
+++ b/csrc/graph/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_graph)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 set(SRCS
         common.cpp
         inference.cpp
@@ -10,7 +10,5 @@ set(SRCS
         task.cpp
         flatten.cpp
         unflatten.cpp)
-build_target(${PROJECT_NAME} "${SRCS}")
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 add_library(mmdeploy::graph ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/graph/common.cpp b/csrc/graph/common.cpp
index 7881b91f81..fba4b23619 100644
--- a/csrc/graph/common.cpp
+++ b/csrc/graph/common.cpp
@@ -10,7 +10,7 @@ mmdeploy::graph::BaseNode::BaseNode(const mmdeploy::Value& cfg) {
     from_value(cfg["output"], outputs_);
     name_ = cfg.value<std::string>("name", "");
   } catch (...) {
-    ERROR("error parsing config: {}", cfg);
+    MMDEPLOY_ERROR("error parsing config: {}", cfg);
     throw;
   }
 }
diff --git a/csrc/graph/common.h b/csrc/graph/common.h
index 95e8c2d08c..a94d8c5fff 100644
--- a/csrc/graph/common.h
+++ b/csrc/graph/common.h
@@ -12,15 +12,16 @@ namespace mmdeploy::graph {
 
 template <typename EntryType, typename RetType = typename Creator<EntryType>::ReturnType>
 inline Result<RetType> CreateFromRegistry(const Value& config, const char* key = "type") {
-  INFO("config: {}", config);
+  MMDEPLOY_INFO("config: {}", config);
   auto type = config[key].get<std::string>();
   auto creator = Registry<EntryType>::Get().GetCreator(type);
   if (!creator) {
+    MMDEPLOY_ERROR("failed to find module creator: {}", type);
     return Status(eEntryNotFound);
   }
   auto inst = creator->Create(config);
   if (!inst) {
-    ERROR("failed to create module: {}", type);
+    MMDEPLOY_ERROR("failed to create module: {}", type);
     return Status(eFail);
   }
   return std::move(inst);
diff --git a/csrc/graph/flatten.cpp b/csrc/graph/flatten.cpp
index 3153e586b5..d180470eb6 100644
--- a/csrc/graph/flatten.cpp
+++ b/csrc/graph/flatten.cpp
@@ -20,7 +20,7 @@ void FlattenNode::Build(TaskGraph& graph) {
       if (idxs.empty()) {
         idxs = std::move(idx);
       } else if (idx != idxs) {
-        ERROR("args does not have same structure");
+        MMDEPLOY_ERROR("args does not have same structure");
         return Status(eInvalidArgument);
       }
       rets.push_back(std::move(ret));
diff --git a/csrc/graph/inference.cpp b/csrc/graph/inference.cpp
index 34d9c8a374..15c11d581a 100644
--- a/csrc/graph/inference.cpp
+++ b/csrc/graph/inference.cpp
@@ -17,7 +17,7 @@ Inference::Inference(const Value& cfg) : BaseNode(cfg) {
     auto model_path = model_value.get<std::string>();
     model_ = Model(model_path);
   } else {
-    ERROR("unsupported model specification");
+    MMDEPLOY_ERROR("unsupported model specification");
     throw_exception(eInvalidArgument);
   }
 
@@ -31,7 +31,7 @@ Inference::Inference(const Value& cfg) : BaseNode(cfg) {
   value["context"] = context;
   pipeline_ = std::make_unique<Pipeline>(value);
   if (!pipeline_) {
-    ERROR("failed to create pipeline");
+    MMDEPLOY_ERROR("failed to create pipeline");
     throw_exception(eFail);
   }
 }
diff --git a/csrc/graph/pipeline.cpp b/csrc/graph/pipeline.cpp
index ef77a242be..4d668c249a 100644
--- a/csrc/graph/pipeline.cpp
+++ b/csrc/graph/pipeline.cpp
@@ -21,7 +21,7 @@ Pipeline::Pipeline(const Value& cfg) : BaseNode(cfg["pipeline"]) {
       node_input_idx_.push_back(UpdateBindings(nodes_.back()->inputs(), kRead));
       node_output_idx_.push_back(UpdateBindings(nodes_.back()->outputs(), kWrite));
     } else {
-      ERROR("could not create {}:{}", name, type);
+      MMDEPLOY_ERROR("could not create {}:{}", name, type);
       throw_exception(eFail);
     }
   }
@@ -57,7 +57,7 @@ std::vector<int> Pipeline::UpdateBindings(const vector<std::string>& names, Bind
     auto it = binding_name_to_idx_.lower_bound(name);
     if (it == binding_name_to_idx_.end() || it->first != name) {
       if (type == kRead) {
-        ERROR("unknown binding name: {}", name);
+        MMDEPLOY_ERROR("unknown binding name: {}", name);
         throw_exception(eEntryNotFound);
       } else {
         auto index = static_cast<int>(binding_name_to_idx_.size());
diff --git a/csrc/graph/task.cpp b/csrc/graph/task.cpp
index 0791a84a04..ee1f0ebf84 100644
--- a/csrc/graph/task.cpp
+++ b/csrc/graph/task.cpp
@@ -36,7 +36,7 @@ static size_t GetBatchSize(const Value& args) {
 Task::Task(const Value& cfg) : BaseNode(cfg) {
   auto module = CreateFromRegistry<Module>(cfg, "module");
   if (!module) {
-    ERROR("failed to create task: {}", cfg);
+    MMDEPLOY_ERROR("failed to create task: {}", cfg);
     throw_exception(eFail);
   }
   module_ = std::move(module).value();
@@ -50,7 +50,8 @@ void Task::Build(TaskGraph& graph) {
     auto args = ctx.pop().array();
     auto rets = Value::Array{};
     auto batch_size = GetBatchSize(args);
-    //    ERROR("name: {}, is_batched: {}, INPUT batch_size: {}", name_, is_batched_, batch_size);
+    //    MMDEPLOY_ERROR("name: {}, is_batched: {}, INPUT batch_size: {}", name_, is_batched_,
+    //    batch_size);
     if (!is_batched_ && batch_size) {
       rets.resize(outputs_.size(), Value::kArray);
       if (!is_thread_safe_) {
@@ -86,7 +87,7 @@ void Task::Build(TaskGraph& graph) {
       rets = std::move(tmp).array();
     }
     ctx.push(std::move(rets));
-    //    ERROR("name: {}, is_batched: {}, OUTPUT batch_size: {}", name_, is_batched_,
+    //    MMDEPLOY_ERROR("name: {}, is_batched: {}, OUTPUT batch_size: {}", name_, is_batched_,
     //          GetBatchSize(rets));
     return success();
   });
diff --git a/csrc/model/CMakeLists.txt b/csrc/model/CMakeLists.txt
index fbeaaf6050..ebfbf2167e 100644
--- a/csrc/model/CMakeLists.txt
+++ b/csrc/model/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.14)
 project(model)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(MODEL_NAMES "directory_model")
 if (${MMDEPLOY_ZIP_MODEL})
@@ -11,18 +11,17 @@ endif ()
 
 foreach (MODEL_NAME ${MODEL_NAMES})
     set(TARGET_MODEL_NAME mmdeploy_${MODEL_NAME})
-    build_target(${TARGET_MODEL_NAME} ${MODEL_NAME}_impl.cpp)
-     target_link_libraries(${TARGET_MODEL_NAME}
-            PRIVATE mmdeploy::core
-            PUBLIC stdc++fs)
+    mmdeploy_add_module(${TARGET_MODEL_NAME} ${MODEL_NAME}_impl.cpp)
+    if (NOT MSVC)
+        target_link_libraries(${TARGET_MODEL_NAME} PUBLIC stdc++fs)
+    endif ()
     if (${MODEL_NAME} STREQUAL "zip_model")
         find_package(libzip QUIET)
         if (libzip_FOUND)
             target_link_libraries(${TARGET_MODEL_NAME} PUBLIC libzip::zip)
-        else()
+        else ()
             target_link_libraries(${TARGET_MODEL_NAME} PUBLIC zip)
-        endif()
+        endif ()
     endif ()
     add_library(mmdeploy::${MODEL_NAME} ALIAS ${TARGET_MODEL_NAME})
-    export_module(${TARGET_MODEL_NAME})
 endforeach ()
diff --git a/csrc/model/directory_model_impl.cpp b/csrc/model/directory_model_impl.cpp
index 2de9d5ec23..202eafb12f 100644
--- a/csrc/model/directory_model_impl.cpp
+++ b/csrc/model/directory_model_impl.cpp
@@ -5,17 +5,10 @@
 #include "archive/json_archive.h"
 #include "core/model.h"
 #include "core/model_impl.h"
+#include "core/utils/filesystem.h"
 
 using nlohmann::json;
 
-#if __GNUC__ >= 8
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
-
 namespace mmdeploy {
 
 class DirectoryModelImpl : public ModelImpl {
@@ -52,7 +45,7 @@ class DirectoryModelImpl : public ModelImpl {
       from_json(json::parse(deploy_json), meta);
       return meta;
     } catch (std::exception& e) {
-      ERROR("exception happened: {}", e.what());
+      MMDEPLOY_ERROR("exception happened: {}", e.what());
       return Status(eFail);
     }
   }
diff --git a/csrc/model/zip_model_impl.cpp b/csrc/model/zip_model_impl.cpp
index 0f1479f64c..54545860f8 100644
--- a/csrc/model/zip_model_impl.cpp
+++ b/csrc/model/zip_model_impl.cpp
@@ -7,14 +7,8 @@
 #include "core/logger.h"
 #include "core/model.h"
 #include "core/model_impl.h"
+#include "core/utils/filesystem.h"
 #include "zip.h"
-#if __GNUC__ >= 8
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
 
 using nlohmann::json;
 
@@ -40,10 +34,10 @@ class ZipModelImpl : public ModelImpl {
     int ret = 0;
     zip_ = zip_open(model_path.c_str(), 0, &ret);
     if (ret != 0) {
-      INFO("open zip file {} failed, ret {}", model_path.c_str(), ret);
+      MMDEPLOY_INFO("open zip file {} failed, ret {}", model_path.c_str(), ret);
       return Status(eInvalidArgument);
     }
-    INFO("open sdk model file {} successfully", model_path.c_str());
+    MMDEPLOY_INFO("open sdk model file {} successfully", model_path.c_str());
     return InitZip();
   }
 
@@ -70,24 +64,25 @@ class ZipModelImpl : public ModelImpl {
 
     auto iter = file_index_.find(file_path);
     if (iter == file_index_.end()) {
-      ERROR("cannot find file {} under dir {}", file_path.c_str(), root_dir_.c_str());
+      MMDEPLOY_ERROR("cannot find file {} under dir {}", file_path.c_str(), root_dir_.c_str());
       return Status(eFail);
     }
     index = iter->second;
     struct zip_file* pzip = zip_fopen_index(zip_, index, 0);
     if (nullptr == pzip) {
-      ERROR("read file {} in zip file failed, whose index is {}", file_path.c_str(), index);
+      MMDEPLOY_ERROR("read file {} in zip file failed, whose index is {}", file_path.c_str(),
+                     index);
       return Status(eFail);
     }
     struct zip_stat stat {};
     if ((ret = zip_stat_index(zip_, index, 0, &stat)) < 0) {
-      ERROR("get stat of file {} error, ret {}", file_path.c_str(), ret);
+      MMDEPLOY_ERROR("get stat of file {} error, ret {}", file_path.c_str(), ret);
       return Status(eFail);
     }
-    DEBUG("file size {}", (int)stat.size);
+    MMDEPLOY_DEBUG("file size {}", (int)stat.size);
     std::vector<char> buf(stat.size);
     if ((ret = zip_fread(pzip, buf.data(), stat.size)) < 0) {
-      ERROR("read data of file {} error, ret {}", file_path.c_str(), ret);
+      MMDEPLOY_ERROR("read data of file {} error, ret {}", file_path.c_str(), ret);
       return Status(eFail);
     }
     return std::string(buf.begin(), buf.end());
@@ -100,7 +95,7 @@ class ZipModelImpl : public ModelImpl {
       from_json(json::parse(deploy_json), meta);
       return meta;
     } catch (std::exception& e) {
-      ERROR("exception happened: {}", e.what());
+      MMDEPLOY_ERROR("exception happened: {}", e.what());
       return Status(eFail);
     }
   }
@@ -108,7 +103,7 @@ class ZipModelImpl : public ModelImpl {
  private:
   Result<void> InitZip() {
     int files = zip_get_num_files(zip_);
-    INFO("there are {} files in sdk model file", files);
+    MMDEPLOY_INFO("there are {} files in sdk model file", files);
     if (files == 0) {
       return Status(eFail);
     }
@@ -119,9 +114,9 @@ class ZipModelImpl : public ModelImpl {
       fs::path path(stat.name);
       auto file_name = path.filename().string();
       if (file_name == ".") {
-        DEBUG("{}-th file name is: {}， which is a directory", i, stat.name);
+        MMDEPLOY_DEBUG("{}-th file name is: {}， which is a directory", i, stat.name);
       } else {
-        DEBUG("{}-th file name is: {}， which is a file", i, stat.name);
+        MMDEPLOY_DEBUG("{}-th file name is: {}， which is a file", i, stat.name);
         file_index_[file_name] = i;
       }
     }
diff --git a/csrc/net/CMakeLists.txt b/csrc/net/CMakeLists.txt
index f339801c04..f411abe5e4 100644
--- a/csrc/net/CMakeLists.txt
+++ b/csrc/net/CMakeLists.txt
@@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_net_module)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 if ("trt" IN_LIST MMDEPLOY_TARGET_BACKENDS)
     add_subdirectory(trt)
@@ -24,7 +24,5 @@ if ("openvino" IN_LIST MMDEPLOY_TARGET_BACKENDS)
     add_subdirectory(openvino)
 endif ()
 
-build_target(${PROJECT_NAME} net_module.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core)
+mmdeploy_add_module(${PROJECT_NAME} net_module.cpp)
 add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/net/ncnn/CMakeLists.txt b/csrc/net/ncnn/CMakeLists.txt
index 5e83abbf53..3c0e1ff6e7 100644
--- a/csrc/net/ncnn/CMakeLists.txt
+++ b/csrc/net/ncnn/CMakeLists.txt
@@ -2,19 +2,17 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_ncnn_net)
 
-if("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
-  include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
+    include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
-  find_package(ncnn REQUIRED)
-  add_library(${PROJECT_NAME} SHARED ncnn_net.cpp)
-  target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core ncnn)
-  target_link_libraries(
-    ${PROJECT_NAME} PRIVATE -Wl,--whole-archive mmdeploy::ncnn_ops::static
-                               -Wl,--no-whole-archive)
-  add_library(mmdeploy::ncnn_net ALIAS ${PROJECT_NAME})
-  export_module(${PROJECT_NAME})
-else()
-  message(
-    ERROR
-    "'ncnn_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
-endif()
+    find_package(ncnn REQUIRED)
+
+    mmdeploy_add_module(${PROJECT_NAME} ncnn_net.cpp)
+    target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_ncnn_ops_obj)
+    target_link_libraries(${PROJECT_NAME} PRIVATE ncnn)
+    add_library(mmdeploy::ncnn_net ALIAS ${PROJECT_NAME})
+else ()
+    message(
+            ERROR
+            "'ncnn_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
+endif ()
diff --git a/csrc/net/ncnn/ncnn_net.cpp b/csrc/net/ncnn/ncnn_net.cpp
index 29b37242e3..1a958ccfe8 100644
--- a/csrc/net/ncnn/ncnn_net.cpp
+++ b/csrc/net/ncnn/ncnn_net.cpp
@@ -5,6 +5,7 @@
 #include "core/logger.h"
 #include "core/model.h"
 #include "core/utils/formatter.h"
+#include "ncnn_ops_register.h"
 
 namespace mmdeploy {
 
@@ -33,27 +34,27 @@ Result<void> NCNNNet::Init(const Value& args) {
   OUTCOME_TRY(params_, model.ReadFile(config.net));
   OUTCOME_TRY(weights_, model.ReadFile(config.weights));
 
+  register_mmdeploy_custom_layers(net_);
+
   OUTCOME_TRY(ncnn_status(net_.load_param_mem(params_.c_str())));
   net_.load_model(reinterpret_cast<const unsigned char*>(weights_.data()));
 
   input_indices_ = net_.input_indexes();
   for (const auto& x : net_.input_names()) {
-    //    input_names_.emplace_back(x);
     input_tensors_.emplace_back(TensorDesc{
-        .device = Device("cpu"),
-        .data_type = DataType::kFLOAT,
-        .shape = {},
-        .name = x,
+        Device("cpu"),
+        DataType::kFLOAT,
+        {},
+        x,
     });
   }
   output_indices_ = net_.output_indexes();
   for (const auto& x : net_.output_names()) {
-    //    output_names_.emplace_back(x);
     output_tensors_.emplace_back(TensorDesc{
-        .device = Device("cpu"),
-        .data_type = DataType::kFLOAT,
-        .shape = {},
-        .name = x,
+        Device("cpu"),
+        DataType::kFLOAT,
+        {},
+        x,
     });
   }
 
@@ -107,7 +108,7 @@ class NCNNNetCreator : public Creator<Net> {
     if (auto r = p->Init(args)) {
       return p;
     } else {
-      ERROR("error creating NCNNNet: {}", r.error().message().c_str());
+      MMDEPLOY_ERROR("error creating NCNNNet: {}", r.error().message().c_str());
       return nullptr;
     }
   }
diff --git a/csrc/net/net_module.cpp b/csrc/net/net_module.cpp
index 7ebd78df94..1216d4d7ee 100644
--- a/csrc/net/net_module.cpp
+++ b/csrc/net/net_module.cpp
@@ -24,7 +24,7 @@ struct NetModule::Impl {
   using Output = std::map<std::string, Tensor>;
 
   explicit Impl(const Value& args) {
-    DEBUG("Net Module cfg: {}", args);
+    MMDEPLOY_DEBUG("Net Module cfg: {}", args);
     auto init = [&]() -> Result<void> {
       auto name = args["name"].get<std::string>();
       auto& context = args["context"];
@@ -34,7 +34,7 @@ struct NetModule::Impl {
       stream_ = context.value("stream", Stream::GetDefault(device_));
       auto creator = Registry<Net>::Get().GetCreator(config.backend);
       if (!creator) {
-        ERROR("Net backend not found: {}", config.backend);
+        MMDEPLOY_ERROR("Net backend not found: {}", config.backend);
         return Status(eEntryNotFound);
       }
       auto net_cfg = args;
@@ -82,13 +82,13 @@ struct NetModule::Impl {
       return shape;
     }
     if (shape[0] != 1) {
-      ERROR("unsupported shape for batch assemble: {}", shape);
+      MMDEPLOY_ERROR("unsupported shape for batch assemble: {}", shape);
       return Status(eNotSupported);
     }
     for (int i = 1; i < input.size(); ++i) {
       auto& sample = input[i];
       if (sample.shape() != shape) {
-        ERROR("shapes are not consistent across the batch");
+        MMDEPLOY_ERROR("shapes are not consistent across the batch");
         return Status(eNotSupported);
       }
     }
@@ -122,7 +122,7 @@ struct NetModule::Impl {
         if (auto it = sample.find(name); it != sample.end()) {
           tmp.push_back(it->second);
         } else {
-          ERROR("sample {} missing key {}", i, name);
+          MMDEPLOY_ERROR("sample {} missing key {}", i, name);
           return Status(eInvalidArgument);
         }
       }
@@ -140,7 +140,7 @@ struct NetModule::Impl {
       auto& src = input_samples[i];
       auto& dst = inputs_[i];
       if (dst.shape() != input_shapes[i]) {
-        ERROR("inconsistent input shape, expect {}, got {}", input_shapes[i], dst.shape());
+        MMDEPLOY_ERROR("inconsistent input shape, expect {}, got {}", input_shapes[i], dst.shape());
         return Status(eFail);
       }
       if (src.size() > 1) {
@@ -165,7 +165,7 @@ struct NetModule::Impl {
       if (tmp.size()) {
         OUTCOME_TRY(t.CopyTo(tmp, stream_));
       } else {
-        WARN("copy skipped due to zero sized tensor");
+        MMDEPLOY_WARN("copy skipped due to zero sized tensor");
       }
       if (output.size() > 1) {
         for (int i = 0; i < output.size(); ++i) {
diff --git a/csrc/net/openvino/CMakeLists.txt b/csrc/net/openvino/CMakeLists.txt
index 14542aa94c..6963739e21 100644
--- a/csrc/net/openvino/CMakeLists.txt
+++ b/csrc/net/openvino/CMakeLists.txt
@@ -3,15 +3,13 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_openvino_net)
 
 if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
-    include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
     find_package(InferenceEngine REQUIRED)
 
-    add_library(${PROJECT_NAME} SHARED openvino_net.cpp)
+    mmdeploy_add_module(${PROJECT_NAME} openvino_net.cpp)
     target_link_libraries(${PROJECT_NAME} PRIVATE
-            mmdeploy::core
             ${InferenceEngine_LIBRARIES})
     add_library(mmdeploy::openvino_net ALIAS ${PROJECT_NAME})
-    export_module(${PROJECT_NAME})
 else ()
     message(ERROR "'openvino_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
 endif ()
diff --git a/csrc/net/openvino/openvino_net.cpp b/csrc/net/openvino/openvino_net.cpp
index af20899a6b..1176967c9a 100644
--- a/csrc/net/openvino/openvino_net.cpp
+++ b/csrc/net/openvino/openvino_net.cpp
@@ -3,17 +3,11 @@
 
 #include <stdio.h>
 
-#if __GNUC__ >= 8
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-#include <experimental/filesystem>
-namespace fs = std::experimental::filesystem;
-#endif
 #include <fstream>
 
 #include "core/logger.h"
 #include "core/model.h"
+#include "core/utils/filesystem.h"
 #include "core/utils/formatter.h"
 
 namespace mmdeploy {
@@ -40,7 +34,7 @@ static Result<DataType> ConvertElementType(InferenceEngine::Precision prec) {
     case InferenceEngine::Precision::ePrecision::I64:
       return DataType::kINT64;
     default:
-      ERROR("unsupported InferenceEngine Precision: {}", static_cast<int>(type));
+      MMDEPLOY_ERROR("unsupported InferenceEngine Precision: {}", static_cast<int>(type));
       return Status(eNotSupported);
   }
 }
@@ -58,7 +52,7 @@ static Result<InferenceEngine::Precision::ePrecision> ConvertPrecision(DataType
     case DataType::kINT64:
       return InferenceEngine::Precision::ePrecision::I64;
     default:
-      ERROR("unsupported DataType: {}", static_cast<int>(type));
+      MMDEPLOY_ERROR("unsupported DataType: {}", static_cast<int>(type));
       return Status(eNotSupported);
   }
 }
@@ -99,7 +93,7 @@ Result<void> OpenVINONet::Init(const Value& args) {
     bin_out << raw_bin;
     bin_out.close();
   } catch (const std::exception& e) {
-    ERROR("unhandled exception when creating tmp xml/bin: {}", e.what());
+    MMDEPLOY_ERROR("unhandled exception when creating tmp xml/bin: {}", e.what());
     return Status(eFail);
   }
 
@@ -116,8 +110,7 @@ Result<void> OpenVINONet::Init(const Value& args) {
       OUTCOME_TRY(auto data_type, ConvertElementType(input_data->getPrecision()));
       const auto& size_vector = input_data->getTensorDesc().getDims();
       TensorShape shape{size_vector.begin(), size_vector.end()};
-      input_tensors_.emplace_back(TensorDesc{
-          .device = device_, .data_type = data_type, .shape = shape, .name = input_name});
+      input_tensors_.emplace_back(TensorDesc{device_, data_type, shape, input_name});
     }
 
     // set output tensor
@@ -128,8 +121,7 @@ Result<void> OpenVINONet::Init(const Value& args) {
       OUTCOME_TRY(auto data_type, ConvertElementType(output_data->getPrecision()));
       const auto& size_vector = output_data->getDims();
       TensorShape shape{size_vector.begin(), size_vector.end()};
-      output_tensors_.emplace_back(TensorDesc{
-          .device = device_, .data_type = data_type, .shape = shape, .name = output_name});
+      output_tensors_.emplace_back(TensorDesc{device_, data_type, shape, output_name});
     }
 
     // create request
@@ -141,7 +133,7 @@ Result<void> OpenVINONet::Init(const Value& args) {
     request_ = executable_network.CreateInferRequest();
 
   } catch (const std::exception& e) {
-    ERROR("unhandled exception when creating OpenVINO: {}", e.what());
+    MMDEPLOY_ERROR("unhandled exception when creating OpenVINO: {}", e.what());
     return Status(eFail);
   }
   return success();
@@ -190,7 +182,7 @@ static Result<void> SetBlob(InferenceEngine::InferRequest& request, Tensor& tens
                       InferenceEngine::make_shared_blob<int64_t>(ie_desc, tensor.data<int64_t>()));
       break;
     default:
-      ERROR("unsupported DataType: {}", static_cast<int>(desc.data_type));
+      MMDEPLOY_ERROR("unsupported DataType: {}", static_cast<int>(desc.data_type));
       return Status(eNotSupported);
   }
   return success();
@@ -211,9 +203,7 @@ static Result<void> GetBlob(InferenceEngine::InferRequest& request, Tensor& tens
   auto moutputHolder = moutput->rmap();
   std::shared_ptr<void> data(const_cast<void*>(moutputHolder.as<const void*>()), [](void*) {});
 
-  Tensor blob_tensor = {
-      TensorDesc{.device = device, .data_type = data_type, .shape = shape, .name = output_name},
-      data};
+  Tensor blob_tensor = {TensorDesc{device, data_type, shape, output_name}, data};
   if (!std::equal(blob_tensor.shape().begin(), blob_tensor.shape().end(), tensor.shape().begin()))
     tensor.Reshape(shape);
   OUTCOME_TRY(tensor.CopyFrom(blob_tensor, stream));
@@ -272,11 +262,11 @@ class OpenVINONetCreator : public Creator<Net> {
       if (auto r = p->Init(args)) {
         return p;
       } else {
-        ERROR("error creating OpenVINONet: {}", r.error().message().c_str());
+        MMDEPLOY_ERROR("error creating OpenVINONet: {}", r.error().message().c_str());
         return nullptr;
       }
     } catch (const std::exception& e) {
-      ERROR("unhandled exception when creating OpenVINONet: {}", e.what());
+      MMDEPLOY_ERROR("unhandled exception when creating OpenVINONet: {}", e.what());
       return nullptr;
     }
   }
diff --git a/csrc/net/ort/CMakeLists.txt b/csrc/net/ort/CMakeLists.txt
index 4b7af7aa52..b4b78eff47 100644
--- a/csrc/net/ort/CMakeLists.txt
+++ b/csrc/net/ort/CMakeLists.txt
@@ -3,18 +3,12 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_ort_net)
 
 if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
-    include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
-    add_library(${PROJECT_NAME} SHARED ort_net.cpp)
-    target_include_directories(${PROJECT_NAME} PUBLIC ${ONNXRUNTIME_DIR}/include)
+    include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
+    mmdeploy_add_module(${PROJECT_NAME} ort_net.cpp)
+    target_include_directories(${PROJECT_NAME} PRIVATE ${ONNXRUNTIME_DIR}/include)
     target_link_directories(${PROJECT_NAME} PUBLIC ${ONNXRUNTIME_DIR}/lib)
-    target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core
-            PUBLIC onnxruntime)
-    target_link_libraries(${PROJECT_NAME} PRIVATE
-            -Wl,--whole-archive
-            mmdeploy::onnxruntime::ops::static
-            -Wl,--no-whole-archive)
+    target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_onnxruntime_ops_obj)
     add_library(mmdeploy::ort_net ALIAS ${PROJECT_NAME})
-    export_module(${PROJECT_NAME})
 else ()
     message(ERROR "'ort_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
 endif ()
diff --git a/csrc/net/ort/ort_net.cpp b/csrc/net/ort/ort_net.cpp
index 5a2fb2e637..10ab9f6e1f 100644
--- a/csrc/net/ort/ort_net.cpp
+++ b/csrc/net/ort/ort_net.cpp
@@ -1,9 +1,13 @@
 // Copyright (c) OpenMMLab. All rights reserved.
+
 #include "ort_net.h"
 
+#include <algorithm>
+
 #include "core/logger.h"
 #include "core/model.h"
 #include "core/utils/formatter.h"
+#include "onnxruntime_register.h"
 
 namespace mmdeploy {
 
@@ -25,7 +29,7 @@ static Result<DataType> ConvertElementType(ONNXTensorElementDataType type) {
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
       return DataType::kINT64;
     default:
-      ERROR("unsupported ONNXTensorElementDataType: {}", static_cast<int>(type));
+      MMDEPLOY_ERROR("unsupported ONNXTensorElementDataType: {}", static_cast<int>(type));
       return Status(eNotSupported);
   }
 }
@@ -45,6 +49,9 @@ Result<void> OrtNet::Init(const Value& args) {
 
   Ort::SessionOptions options;
   options.SetLogSeverityLevel(3);
+
+  RegisterCustomOps(options, OrtGetApiBase());
+
   if (device_.is_device()) {
     OrtCUDAProviderOptions cuda_options{};
     cuda_options.device_id = device_.device_id();
@@ -69,12 +76,11 @@ Result<void> OrtNet::Init(const Value& args) {
     auto input_name = session_.GetInputName(i, allocator);
     auto type_info = session_.GetInputTypeInfo(i);
     auto shape = to_shape(type_info);
-    INFO("input {}, shape = {}", i, shape);
+    MMDEPLOY_INFO("input {}, shape = {}", i, shape);
     filter_shape(shape);
     OUTCOME_TRY(auto data_type,
                 ConvertElementType(type_info.GetTensorTypeAndShapeInfo().GetElementType()));
-    input_tensors_.emplace_back(
-        TensorDesc{.device = device_, .data_type = data_type, .shape = shape, .name = input_name});
+    input_tensors_.emplace_back(TensorDesc{device_, data_type, shape, input_name});
     allocator.Free(input_name);
   }
 
@@ -84,12 +90,11 @@ Result<void> OrtNet::Init(const Value& args) {
     auto output_name = session_.GetOutputName(i, allocator);
     auto type_info = session_.GetOutputTypeInfo(i);
     auto shape = to_shape(type_info);
-    INFO("output {}, shape = {}", i, shape);
+    MMDEPLOY_INFO("output {}, shape = {}", i, shape);
     filter_shape(shape);
     OUTCOME_TRY(auto data_type,
                 ConvertElementType(type_info.GetTensorTypeAndShapeInfo().GetElementType()));
-    output_tensors_.emplace_back(
-        TensorDesc{.device = device_, .data_type = data_type, .shape = shape, .name = output_name});
+    output_tensors_.emplace_back(TensorDesc{device_, data_type, shape, output_name});
     allocator.Free(output_name);
   }
 
@@ -166,7 +171,7 @@ Result<void> OrtNet::Forward() {
 
     OUTCOME_TRY(stream_.Wait());
   } catch (const std::exception& e) {
-    ERROR(e.what());
+    MMDEPLOY_ERROR(e.what());
     return Status(eFail);
   }
   return success();
@@ -182,11 +187,11 @@ class OrtNetCreator : public Creator<Net> {
       if (auto r = p->Init(args)) {
         return p;
       } else {
-        ERROR("error creating OrtNet: {}", r.error().message().c_str());
+        MMDEPLOY_ERROR("error creating OrtNet: {}", r.error().message().c_str());
         return nullptr;
       }
     } catch (const std::exception& e) {
-      ERROR("unhandled exception when creating ORTNet: {}", e.what());
+      MMDEPLOY_ERROR("unhandled exception when creating ORTNet: {}", e.what());
       return nullptr;
     }
   }
diff --git a/csrc/net/ppl/CMakeLists.txt b/csrc/net/ppl/CMakeLists.txt
index cb6c0fb31c..dd859f8e29 100644
--- a/csrc/net/ppl/CMakeLists.txt
+++ b/csrc/net/ppl/CMakeLists.txt
@@ -2,10 +2,10 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_pplnn_net)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 find_package(pplnn REQUIRED)
 
-add_library(${PROJECT_NAME} SHARED ppl_net.cpp)
+mmdeploy_add_module(${PROJECT_NAME} ppl_net.cpp)
 target_include_directories(${PROJECT_NAME} PUBLIC
         $<BUILD_INTERFACE:${PPLNN_INCLUDE_DIRS}>)
 if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
@@ -17,7 +17,5 @@ if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
     target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
 endif ()
 target_link_libraries(${PROJECT_NAME}
-        PRIVATE mmdeploy::core ${PPLNN_LIBRARIES}
-        PUBLIC nvrtc)
+        PRIVATE ${PPLNN_LIBRARIES} nvrtc)
 add_library(mmdeploy::pplnn_net ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/net/ppl/ppl_net.cpp b/csrc/net/ppl/ppl_net.cpp
index 72e8f99ac6..f0d4b16030 100644
--- a/csrc/net/ppl/ppl_net.cpp
+++ b/csrc/net/ppl/ppl_net.cpp
@@ -22,7 +22,7 @@ Result<void> ppl_try(int code) {
   if (code == 0) {
     return success();
   }
-  ERROR("ppl error: {}", ppl::common::GetRetCodeStr(code));
+  MMDEPLOY_ERROR("ppl error: {}", ppl::common::GetRetCodeStr(code));
   return Status(eFail);
 }
 
@@ -86,9 +86,9 @@ Result<void> PPLNet::Init(const Value& args) {
     /// debug only
     auto& desc = inputs_internal_[i]->GetShape();
     std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
-    DEBUG("input {}: datatype = {}, dataformat = {}, shape = {}", i,
-          ppl::common::GetDataTypeStr(desc.GetDataType()),
-          ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
+    MMDEPLOY_DEBUG("input {}: datatype = {}, dataformat = {}, shape = {}", i,
+                   ppl::common::GetDataTypeStr(desc.GetDataType()),
+                   ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
   }
 
   for (int i = 0; i < runtime->GetOutputCount(); ++i) {
@@ -98,9 +98,9 @@ Result<void> PPLNet::Init(const Value& args) {
 
     auto desc = outputs_internal_[i]->GetShape();
     std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
-    DEBUG("output {}: datatype = {}, dataformat = {}, shape = {}", i,
-          ppl::common::GetDataTypeStr(desc.GetDataType()),
-          ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
+    MMDEPLOY_DEBUG("output {}: datatype = {}, dataformat = {}, shape = {}", i,
+                   ppl::common::GetDataTypeStr(desc.GetDataType()),
+                   ppl::common::GetDataFormatStr(desc.GetDataFormat()), shape_);
     TensorShape shape(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
   }
 
@@ -176,8 +176,8 @@ Result<void> PPLNet::Forward() {
     auto& internal = *outputs_internal_[i];
     auto format = internal.GetShape().GetDataFormat();
     if (format != ppl::common::DATAFORMAT_NDARRAY) {
-      ERROR("output {}'s format is {}, only NDARRAY is currently supported", i,
-            ppl::common::GetDataFormatStr(format));
+      MMDEPLOY_ERROR("output {}'s format is {}, only NDARRAY is currently supported", i,
+                     ppl::common::GetDataFormatStr(format));
       return Status(eNotSupported);
     }
     auto& external = outputs_external_[i];
@@ -200,7 +200,8 @@ Result<void> PPLNet::Forward() {
       if (external.size() > 0) {
         OUTCOME_TRY(Tensor(external.desc(), data).CopyTo(external, stream_));
       } else {
-        WARN("copy skipped due to zero sized tensor: {} {}", external.name(), external.shape());
+        MMDEPLOY_WARN("copy skipped due to zero sized tensor: {} {}", external.name(),
+                      external.shape());
       }
     }
   }
@@ -235,7 +236,7 @@ Result<void> PPLNet::Reshape(Span<TensorShape> input_shapes) {
   if (can_infer_output_shapes_) {
     OUTCOME_TRY(auto output_shapes,
                 InferOutputShapes(input_shapes, prev_in_shapes, prev_out_shapes));
-    //    ERROR("inferred output shapes: {}", output_shapes);
+    //    MMDEPLOY_ERROR("inferred output shapes: {}", output_shapes);
     for (int i = 0; i < outputs_external_.size(); ++i) {
       auto& output = outputs_external_[i];
       output.Reshape(output_shapes[i]);
@@ -304,7 +305,7 @@ class PPLNetCreator : public Creator<Net> {
     if (auto r = p->Init(args)) {
       return p;
     } else {
-      ERROR("error creating PPLNet: {}", r.error().message().c_str());
+      MMDEPLOY_ERROR("error creating PPLNet: {}", r.error().message().c_str());
       return nullptr;
     }
   }
diff --git a/csrc/net/trt/CMakeLists.txt b/csrc/net/trt/CMakeLists.txt
index 1368e93352..94f08070b0 100644
--- a/csrc/net/trt/CMakeLists.txt
+++ b/csrc/net/trt/CMakeLists.txt
@@ -2,24 +2,16 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_trt_net)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/tensorrt.cmake)
 
-add_library(${PROJECT_NAME} SHARED trt_net.cpp)
+mmdeploy_add_module(${PROJECT_NAME} trt_net.cpp)
 target_include_directories(${PROJECT_NAME} PRIVATE
         ${TENSORRT_INCLUDE_DIR})
 target_include_directories(${PROJECT_NAME} PRIVATE ${CUDNN_DIR}/include)
 target_include_directories(${PROJECT_NAME} PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/include)
-target_link_directories(${PROJECT_NAME} PUBLIC ${CUDNN_DIR}/lib64)
+target_link_directories(${PROJECT_NAME} PUBLIC ${CUDNN_DIR}/lib64 ${CUDNN_DIR}/lib/x64)
+target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_tensorrt_ops_obj)
 target_link_libraries(${PROJECT_NAME} PUBLIC ${TENSORRT_LIBRARY} cudnn)
 
-target_link_libraries(${PROJECT_NAME}
-        PRIVATE mmdeploy::core
-        )
-target_link_libraries(${PROJECT_NAME}
-        PRIVATE -Wl,--whole-archive
-            mmdeploy::tensorrt_ops::static
-        -Wl,--no-whole-archive
-        )
 add_library(mmdeploy::trt_net ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/net/trt/trt_net.cpp b/csrc/net/trt/trt_net.cpp
index 6f4cb940a1..9300aad10e 100644
--- a/csrc/net/trt/trt_net.cpp
+++ b/csrc/net/trt/trt_net.cpp
@@ -18,14 +18,14 @@ class TRTLogger : public nvinfer1::ILogger {
   void log(Severity severity, const char* msg) noexcept override {
     switch (severity) {
       case Severity::kINFO:
-        // INFO("TRTNet: {}", msg);
+        // MMDEPLOY_INFO("TRTNet: {}", msg);
         break;
       case Severity::kWARNING:
-        WARN("TRTNet: {}", msg);
+        MMDEPLOY_WARN("TRTNet: {}", msg);
         break;
       case Severity::kERROR:
       case Severity::kINTERNAL_ERROR:
-        ERROR("TRTNet: {}", msg);
+        MMDEPLOY_ERROR("TRTNet: {}", msg);
         break;
       default:
         break;
@@ -72,7 +72,7 @@ static inline Result<void> trt_try(bool code, const char* msg = nullptr, Status
     return success();
   }
   if (msg) {
-    ERROR("{}", msg);
+    MMDEPLOY_ERROR("{}", msg);
   }
   return e;
 }
@@ -102,7 +102,7 @@ Result<void> TRTNet::Init(const Value& args) {
   auto& context = args["context"];
   device_ = context["device"].get<Device>();
   if (device_.is_host()) {
-    ERROR("TRTNet: device must be a GPU!");
+    MMDEPLOY_ERROR("TRTNet: device must be a GPU!");
     return Status(eNotSupported);
   }
   stream_ = context["stream"].get<Stream>();
@@ -129,19 +129,18 @@ Result<void> TRTNet::Init(const Value& args) {
     auto binding_name = engine_->getBindingName(i);
     auto dims = engine_->getBindingDimensions(i);
     if (engine_->isShapeBinding(i)) {
-      ERROR("shape binding is not supported.");
+      MMDEPLOY_ERROR("shape binding is not supported.");
       return Status(eNotSupported);
     }
     OUTCOME_TRY(auto dtype, MapDataType(engine_->getBindingDataType(i)));
-    TensorDesc desc{
-        .device = device_, .data_type = dtype, .shape = to_shape(dims), .name = binding_name};
+    TensorDesc desc{device_, dtype, to_shape(dims), binding_name};
     if (engine_->bindingIsInput(i)) {
-      DEBUG("input binding {} {} {}", i, binding_name, to_string(dims));
+      MMDEPLOY_DEBUG("input binding {} {} {}", i, binding_name, to_string(dims));
       input_ids_.push_back(i);
       input_names_.emplace_back(binding_name);
       input_tensors_.emplace_back(desc, Buffer());
     } else {
-      DEBUG("output binding {} {} {}", i, binding_name, to_string(dims));
+      MMDEPLOY_DEBUG("output binding {} {} {}", i, binding_name, to_string(dims));
       output_ids_.push_back(i);
       output_names_.emplace_back(binding_name);
       output_tensors_.emplace_back(desc, Buffer());
@@ -169,17 +168,17 @@ Result<void> TRTNet::Reshape(Span<TensorShape> input_shapes) {
   }
   for (int i = 0; i < input_tensors_.size(); ++i) {
     auto dims = to_dims(input_shapes[i]);
-    //    ERROR("input shape: {}", to_string(dims));
+    //    MMDEPLOY_ERROR("input shape: {}", to_string(dims));
     TRT_TRY(context_->setBindingDimensions(input_ids_[i], dims));
     input_tensors_[i].Reshape(input_shapes[i]);
   }
   if (!context_->allInputDimensionsSpecified()) {
-    ERROR("not all input dimensions specified");
+    MMDEPLOY_ERROR("not all input dimensions specified");
     return Status(eFail);
   }
   for (int i = 0; i < output_tensors_.size(); ++i) {
     auto dims = context_->getBindingDimensions(output_ids_[i]);
-    //    ERROR("output shape: {}", to_string(dims));
+    //    MMDEPLOY_ERROR("output shape: {}", to_string(dims));
     output_tensors_[i].Reshape(to_shape(dims));
   }
   return success();
diff --git a/csrc/preprocess/CMakeLists.txt b/csrc/preprocess/CMakeLists.txt
index ab1084198e..503ead8f8d 100644
--- a/csrc/preprocess/CMakeLists.txt
+++ b/csrc/preprocess/CMakeLists.txt
@@ -8,8 +8,7 @@ if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
     add_subdirectory(cuda)
 endif ()
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
-build_target(${PROJECT_NAME} transform_module.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
+mmdeploy_add_module(${PROJECT_NAME} transform_module.cpp)
+target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::transform)
 add_library(mmdeploy::transform_module ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/preprocess/cpu/CMakeLists.txt b/csrc/preprocess/cpu/CMakeLists.txt
index 00c87cd7ee..d2a75b10e8 100644
--- a/csrc/preprocess/cpu/CMakeLists.txt
+++ b/csrc/preprocess/cpu/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_cpu_transform_impl)
 
 include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(SRCS
         collect_impl.cpp
@@ -11,13 +11,10 @@ set(SRCS
         image2tensor_impl.cpp
         load_impl.cpp
         normalize_impl.cpp
-        opencv_utils.cpp
-        opencv_utils.h
         pad_impl.cpp
         resize_impl.cpp)
-build_target(${PROJECT_NAME} "${SRCS}")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 target_link_libraries(${PROJECT_NAME}
-        PUBLIC opencv_imgproc opencv_core
-        PRIVATE mmdeploy::core)
+        PRIVATE mmdeploy::transform
+        mmdeploy_opencv_utils)
 add_library(mmdeploy::transform_impl::cpu ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/preprocess/cpu/pad_impl.cpp b/csrc/preprocess/cpu/pad_impl.cpp
index 3a139d70ad..c75ba4139b 100644
--- a/csrc/preprocess/cpu/pad_impl.cpp
+++ b/csrc/preprocess/cpu/pad_impl.cpp
@@ -17,7 +17,7 @@ class PadImpl : public ::mmdeploy::PadImpl {
                                        {"reflect", cv::BORDER_REFLECT_101},
                                        {"symmetric", cv::BORDER_REFLECT}};
     if (border_map.find(arg_.padding_mode) == border_map.end()) {
-      ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
+      MMDEPLOY_ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
       throw std::invalid_argument("unsupported padding_mode");
     }
     border_type_ = border_map[arg_.padding_mode];
diff --git a/csrc/preprocess/cuda/CMakeLists.txt b/csrc/preprocess/cuda/CMakeLists.txt
index ac4def77d3..76caeb214b 100644
--- a/csrc/preprocess/cuda/CMakeLists.txt
+++ b/csrc/preprocess/cuda/CMakeLists.txt
@@ -2,29 +2,29 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_cuda_transform_impl CUDA CXX)
 
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
-  # suppress 'CMAKE_CUDA_ARCHITECTURES' warning
-  cmake_policy(SET CMP0104 OLD)
-endif()
+if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
+    # suppress 'CMAKE_CUDA_ARCHITECTURES' warning
+    cmake_policy(SET CMP0104 OLD)
+endif ()
 
 find_package(pplcv REQUIRED)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(SRCS
-    crop_impl.cpp
-    image2tensor_impl.cpp
-    load_impl.cpp
-    normalize_impl.cpp
-    pad_impl.cpp
-    resize_impl.cpp
-    cast.cu
-    crop.cu
-    normalize.cu
-    transpose.cu)
-build_target(${PROJECT_NAME} "${SRCS}")
+        crop_impl.cpp
+        image2tensor_impl.cpp
+        load_impl.cpp
+        normalize_impl.cpp
+        pad_impl.cpp
+        resize_impl.cpp
+        cast.cu
+        crop.cu
+        normalize.cu
+        transpose.cu)
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME} PRIVATE
+        mmdeploy::transform ${PPLCV_LIBRARIES})
 target_include_directories(${PROJECT_NAME}
-                           PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)
-target_link_libraries(${PROJECT_NAME} PRIVATE ${PPLCV_LIBRARIES} mmdeploy::core)
+        PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include ${PPLCV_INCLUDE_DIRS})
 add_library(mmdeploy::transform_impl::cuda ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/preprocess/cuda/crop_impl.cpp b/csrc/preprocess/cuda/crop_impl.cpp
index 0808e8c733..eb6f64f835 100644
--- a/csrc/preprocess/cuda/crop_impl.cpp
+++ b/csrc/preprocess/cuda/crop_impl.cpp
@@ -43,7 +43,7 @@ class CenterCropImpl : public ::mmdeploy::CenterCropImpl {
       } else if (1 == c) {
         Crop<uint8_t, 1>(input, desc.shape[2], output, h, w, top, left, stream);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         return Status(eNotSupported);
       }
     } else if (DataType::kFLOAT == type) {
@@ -54,11 +54,11 @@ class CenterCropImpl : public ::mmdeploy::CenterCropImpl {
       } else if (1 == c) {
         Crop<float, 1>(input, desc.shape[2], output, h, w, top, left, stream);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         return Status(eNotSupported);
       }
     } else {
-      ERROR("unsupported channels {}", c);
+      MMDEPLOY_ERROR("unsupported channels {}", c);
       return Status(eNotSupported);
     }
     return dst_tensor;
diff --git a/csrc/preprocess/cuda/load_impl.cpp b/csrc/preprocess/cuda/load_impl.cpp
index 2d8df26b87..e7ffe506d2 100644
--- a/csrc/preprocess/cuda/load_impl.cpp
+++ b/csrc/preprocess/cuda/load_impl.cpp
@@ -70,11 +70,11 @@ class PrepareImageImpl : public ::mmdeploy::PrepareImageImpl {
         BGRA2BGR<uint8_t>(stream, src_h, src_w, src_stride, src_ptr, dst_stride, dst_ptr);
         break;
       default:
-        ERROR("src type: unknown type {}", img.pixel_format());
+        MMDEPLOY_ERROR("src type: unknown type {}", img.pixel_format());
         return Status(eNotSupported);
     }
     if (ret != 0) {
-      ERROR("color transfer from {} to BGR failed, ret {}", img.pixel_format(), ret);
+      MMDEPLOY_ERROR("color transfer from {} to BGR failed, ret {}", img.pixel_format(), ret);
       return Status(eFail);
     }
     if (arg_.to_float32) {
@@ -140,11 +140,11 @@ class PrepareImageImpl : public ::mmdeploy::PrepareImageImpl {
         BGRA2GRAY<uint8_t>(stream, src_h, src_w, src_stride, src_ptr, dst_stride, dst_ptr);
         break;
       default:
-        ERROR("src type: unknown type {}", img.pixel_format());
+        MMDEPLOY_ERROR("src type: unknown type {}", img.pixel_format());
         throw Status(eNotSupported);
     }
     if (ret != 0) {
-      ERROR("color transfer from {} to Gray failed", img.pixel_format());
+      MMDEPLOY_ERROR("color transfer from {} to Gray failed", img.pixel_format());
       throw Status(eFail);
     }
     if (arg_.to_float32) {
diff --git a/csrc/preprocess/cuda/normalize.cu b/csrc/preprocess/cuda/normalize.cu
index 696abcc7d3..9536ecd054 100644
--- a/csrc/preprocess/cuda/normalize.cu
+++ b/csrc/preprocess/cuda/normalize.cu
@@ -1,5 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <cuda_runtime.h>
+
 #include <cstdint>
 #include <cstdio>
 
@@ -12,7 +14,7 @@ __global__ void normalize(const T* src, int height, int width, int stride, float
   int x = (int)(blockIdx.x * blockDim.x + threadIdx.x);
   int y = (int)(blockIdx.y * blockDim.y + threadIdx.y);
 
-  if (x >= width or y >= height) {
+  if (x >= width || y >= height) {
     return;
   }
 
diff --git a/csrc/preprocess/cuda/normalize_impl.cpp b/csrc/preprocess/cuda/normalize_impl.cpp
index 639f31aa98..48e6647990 100644
--- a/csrc/preprocess/cuda/normalize_impl.cpp
+++ b/csrc/preprocess/cuda/normalize_impl.cpp
@@ -41,7 +41,7 @@ class NormalizeImpl : public ::mmdeploy::NormalizeImpl {
         Normalize<uint8_t, 1>(input, h, w, stride, output, arg_.mean.data(), arg_.std.data(),
                               arg_.to_rgb, stream);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         return Status(eNotSupported);
       }
     } else if (DataType::kFLOAT == src_desc.data_type) {
@@ -53,11 +53,11 @@ class NormalizeImpl : public ::mmdeploy::NormalizeImpl {
         Normalize<float, 1>(input, h, w, stride, output, arg_.mean.data(), arg_.std.data(),
                             arg_.to_rgb, stream);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         return Status(eNotSupported);
       }
     } else {
-      ERROR("unsupported data type {}", src_desc.data_type);
+      MMDEPLOY_ERROR("unsupported data type {}", src_desc.data_type);
       assert(0);
       return Status(eNotSupported);
     }
diff --git a/csrc/preprocess/cuda/pad_impl.cpp b/csrc/preprocess/cuda/pad_impl.cpp
index ae567cb092..77781c0485 100644
--- a/csrc/preprocess/cuda/pad_impl.cpp
+++ b/csrc/preprocess/cuda/pad_impl.cpp
@@ -14,12 +14,20 @@ namespace cuda {
 class PadImpl : public ::mmdeploy::PadImpl {
  public:
   explicit PadImpl(const Value& args) : ::mmdeploy::PadImpl(args) {
+#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
+    map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_CONSTANT},
+                                                {"edge", ppl::cv::BORDER_REPLICATE},
+                                                {"reflect", ppl::cv::BORDER_REFLECT_101},
+                                                { "symmetric",
+                                                  ppl::cv::BORDER_REFLECT }};
+#else
     map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_TYPE_CONSTANT},
                                                 {"edge", ppl::cv::BORDER_TYPE_REPLICATE},
                                                 {"reflect", ppl::cv::BORDER_TYPE_REFLECT_101},
                                                 {"symmetric", ppl::cv::BORDER_TYPE_REFLECT}};
+#endif
     if (border_map.find(arg_.padding_mode) == border_map.end()) {
-      ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
+      MMDEPLOY_ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
       throw_exception(eNotSupported);
     }
     padding_mode_ = border_map[arg_.padding_mode];
@@ -55,7 +63,7 @@ class PadImpl : public ::mmdeploy::PadImpl {
                                        dst_buffer, padding[1], padding[3], padding[0], padding[2],
                                        padding_mode_, arg_.pad_val);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         assert(0);
         return Status(eNotSupported);
       }
@@ -71,17 +79,17 @@ class PadImpl : public ::mmdeploy::PadImpl {
             stream, height, width, width * c, src_buffer, dst_width * c, dst_buffer, padding[1],
             padding[3], padding[0], padding[2], padding_mode_, (ppl::cv::uchar)arg_.pad_val);
       } else {
-        ERROR("unsupported channels {}", c);
+        MMDEPLOY_ERROR("unsupported channels {}", c);
         assert(0);
         return Status(eNotSupported);
       }
     } else {
-      ERROR("unsupported data type {}", desc.data_type);
+      MMDEPLOY_ERROR("unsupported data type {}", desc.data_type);
       assert(0);
       return Status(eNotSupported);
     }
     if (ret != 0) {
-      ERROR("unexpected exception happened");
+      MMDEPLOY_ERROR("unexpected exception happened");
       assert(0);
       return Status(eNotSupported);
     }
diff --git a/csrc/preprocess/cuda/resize_impl.cpp b/csrc/preprocess/cuda/resize_impl.cpp
index ce0a891c3c..8a37664801 100644
--- a/csrc/preprocess/cuda/resize_impl.cpp
+++ b/csrc/preprocess/cuda/resize_impl.cpp
@@ -14,7 +14,7 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
  public:
   explicit ResizeImpl(const Value& args) : ::mmdeploy::ResizeImpl(args) {
     if (arg_.interpolation != "bilinear" && arg_.interpolation != "nearest") {
-      ERROR("{} interpolation is not supported", arg_.interpolation);
+      MMDEPLOY_ERROR("{} interpolation is not supported", arg_.interpolation);
       throw_exception(eNotSupported);
     }
   }
@@ -33,7 +33,7 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
     } else if (tensor.data_type() == DataType::kFLOAT) {
       OUTCOME_TRY(ResizeDispatch<float>(src_tensor, dst_tensor, stream));
     } else {
-      ERROR("unsupported data type {}", tensor.data_type());
+      MMDEPLOY_ERROR("unsupported data type {}", tensor.data_type());
       return Status(eNotSupported);
     }
     return dst_tensor;
@@ -42,23 +42,23 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
  private:
   template <class T, int C, class... Args>
   ppl::common::RetCode DispatchImpl(Args&&... args) {
-#ifdef PPLCV_VERSION_MAJOR
+#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
     if (arg_.interpolation == "bilinear") {
       return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
-                                         ppl::cv::INTERPOLATION_TYPE_LINEAR);
+                                         ppl::cv::INTERPOLATION_LINEAR);
     }
     if (arg_.interpolation == "nearest") {
       return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
-                                         ppl::cv::INTERPOLATION_TYPE_NEAREST_POINT);
+                                         ppl::cv::INTERPOLATION_NEAREST_POINT);
     }
-
 #else
-#warning "support for ppl.cv < 0.6 is deprecated and will be dropped in the future"
     if (arg_.interpolation == "bilinear") {
-      return ppl::cv::cuda::ResizeLinear<T, C>(std::forward<Args>(args)...);
+      return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
+                                         ppl::cv::INTERPOLATION_TYPE_LINEAR);
     }
     if (arg_.interpolation == "nearest") {
-      return ppl::cv::cuda::ResizeNearestPoint<T, C>(std::forward<Args>(args)...);
+      return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
+                                         ppl::cv::INTERPOLATION_TYPE_NEAREST_POINT);
     }
 #endif
     return ppl::common::RC_UNSUPPORTED;
@@ -82,7 +82,7 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
     } else if (4 == c) {
       ret = DispatchImpl<T, 4>(stream, h, w, w * c, input, dst_h, dst_w, dst_w * c, output);
     } else {
-      ERROR("unsupported channels {}", c);
+      MMDEPLOY_ERROR("unsupported channels {}", c);
       return Status(eNotSupported);
     }
     return ret == 0 ? success() : Result<void>(Status(eFail));
diff --git a/csrc/preprocess/transform/CMakeLists.txt b/csrc/preprocess/transform/CMakeLists.txt
index e9a9c14026..8e13a67ae1 100644
--- a/csrc/preprocess/transform/CMakeLists.txt
+++ b/csrc/preprocess/transform/CMakeLists.txt
@@ -2,21 +2,19 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy_transform)
 
-include(${CMAKE_SOURCE_DIR}/cmake/common.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
 
 set(SRCS
-    collect.cpp
-    compose.cpp
-    crop.cpp
-    image2tensor.cpp
-    load.cpp
-    normalize.cpp
-    pad.cpp
-    resize.cpp
-    transform.cpp)
-build_target(${PROJECT_NAME} "${SRCS}")
+        collect.cpp
+        compose.cpp
+        crop.cpp
+        image2tensor.cpp
+        load.cpp
+        normalize.cpp
+        pad.cpp
+        resize.cpp
+        transform.cpp)
+mmdeploy_add_module(${PROJECT_NAME} LIBRARY "${SRCS}")
 target_include_directories(
-  ${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/preprocess>)
-target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::core)
+        ${PROJECT_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/preprocess>)
 add_library(mmdeploy::transform ALIAS ${PROJECT_NAME})
-export_module(${PROJECT_NAME})
diff --git a/csrc/preprocess/transform/collect.cpp b/csrc/preprocess/transform/collect.cpp
index 673514c780..d01d1cf4b3 100644
--- a/csrc/preprocess/transform/collect.cpp
+++ b/csrc/preprocess/transform/collect.cpp
@@ -26,7 +26,7 @@ CollectImpl::CollectImpl(const Value &args) {
 }
 
 Result<Value> CollectImpl::Process(const Value &input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   Value output;
 
   // collect 'ori_img' and 'attribute' from `input`, because those two fields
@@ -45,7 +45,7 @@ Result<Value> CollectImpl::Process(const Value &input) {
   }
   for (auto &key : arg_.keys) {
     if (!input.contains(key)) {
-      ERROR("missed key '{}' in input", key);
+      MMDEPLOY_ERROR("missed key '{}' in input", key);
       //      return Status(eInvalidArgument);
       return Status(eInvalidArgument);
     } else {
@@ -53,7 +53,7 @@ Result<Value> CollectImpl::Process(const Value &input) {
     }
   }
 
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
@@ -77,4 +77,6 @@ class CollectCreator : public Creator<Transform> {
 
 REGISTER_MODULE(Transform, CollectCreator);
 
+MMDEPLOY_DEFINE_REGISTRY(CollectImpl);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/collect.h b/csrc/preprocess/transform/collect.h
index 92439120f1..327c5191e4 100644
--- a/csrc/preprocess/transform/collect.h
+++ b/csrc/preprocess/transform/collect.h
@@ -6,7 +6,7 @@
 #include "transform.h"
 namespace mmdeploy {
 
-class CollectImpl : public Module {
+class MMDEPLOY_API CollectImpl : public Module {
  public:
   explicit CollectImpl(const Value& args);
   ~CollectImpl() = default;
@@ -24,7 +24,7 @@ class CollectImpl : public Module {
   ArgType arg_;
 };
 
-class Collect : public Transform {
+class MMDEPLOY_API Collect : public Transform {
  public:
   explicit Collect(const Value& args, int version = 0);
   ~Collect() = default;
@@ -35,6 +35,8 @@ class Collect : public Transform {
   std::unique_ptr<CollectImpl> impl_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(CollectImpl);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_COLLECT_H
diff --git a/csrc/preprocess/transform/compose.cpp b/csrc/preprocess/transform/compose.cpp
index 57417eb480..a52b6848a1 100644
--- a/csrc/preprocess/transform/compose.cpp
+++ b/csrc/preprocess/transform/compose.cpp
@@ -17,15 +17,15 @@ Compose::Compose(const Value& args, int version) : Transform(args) {
   for (auto cfg : args["transforms"]) {
     cfg["context"] = context;
     auto type = cfg.value("type", std::string{});
-    DEBUG("creating transform: {} with cfg: {}", type, mmdeploy::to_json(cfg).dump(2));
+    MMDEPLOY_DEBUG("creating transform: {} with cfg: {}", type, mmdeploy::to_json(cfg).dump(2));
     auto creator = Registry<Transform>::Get().GetCreator(type, version);
     if (!creator) {
-      ERROR("unable to find creator: {}", type);
+      MMDEPLOY_ERROR("unable to find creator: {}", type);
       throw std::invalid_argument("unable to find creator");
     }
     auto transform = creator->Create(cfg);
     if (!transform) {
-      ERROR("failed to create transform: {}", type);
+      MMDEPLOY_ERROR("failed to create transform: {}", type);
       throw std::invalid_argument("failed to create transform");
     }
     transforms_.push_back(std::move(transform));
diff --git a/csrc/preprocess/transform/compose.h b/csrc/preprocess/transform/compose.h
index 3472d3e209..41f170371c 100644
--- a/csrc/preprocess/transform/compose.h
+++ b/csrc/preprocess/transform/compose.h
@@ -7,7 +7,7 @@
 
 namespace mmdeploy {
 
-class Compose : public Transform {
+class MMDEPLOY_API Compose : public Transform {
  public:
   explicit Compose(const Value& args, int version = 0);
   ~Compose() override = default;
diff --git a/csrc/preprocess/transform/crop.cpp b/csrc/preprocess/transform/crop.cpp
index d2b9977dc4..1ea8867cab 100644
--- a/csrc/preprocess/transform/crop.cpp
+++ b/csrc/preprocess/transform/crop.cpp
@@ -24,7 +24,7 @@ CenterCropImpl::CenterCropImpl(const Value& args) : TransformImpl(args) {
 }
 
 Result<Value> CenterCropImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   auto img_fields = GetImageFields(input);
 
   // copy input data, and update its properties
@@ -63,14 +63,14 @@ Result<Value> CenterCropImpl::Process(const Value& input) {
     }
   }
 
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
 CenterCrop::CenterCrop(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<CenterCropImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'CenterCrop' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'CenterCrop' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'Resize' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -87,4 +87,5 @@ class CenterCropCreator : public Creator<Transform> {
 };
 
 REGISTER_MODULE(Transform, CenterCropCreator);
+MMDEPLOY_DEFINE_REGISTRY(CenterCropImpl);
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/crop.h b/csrc/preprocess/transform/crop.h
index 46bd50737b..76c567271e 100644
--- a/csrc/preprocess/transform/crop.h
+++ b/csrc/preprocess/transform/crop.h
@@ -3,12 +3,14 @@
 #ifndef MMDEPLOY_CROP_H
 #define MMDEPLOY_CROP_H
 
+#include <array>
+
 #include "core/tensor.h"
 #include "transform.h"
 
 namespace mmdeploy {
 
-class CenterCropImpl : public TransformImpl {
+class MMDEPLOY_API CenterCropImpl : public TransformImpl {
  public:
   explicit CenterCropImpl(const Value& args);
   ~CenterCropImpl() = default;
@@ -29,7 +31,7 @@ class CenterCropImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class CenterCrop : public Transform {
+class MMDEPLOY_API CenterCrop : public Transform {
  public:
   explicit CenterCrop(const Value& args, int version = 0);
   ~CenterCrop() = default;
@@ -40,6 +42,8 @@ class CenterCrop : public Transform {
   std::unique_ptr<CenterCropImpl> impl_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(CenterCropImpl);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_CROP_H
diff --git a/csrc/preprocess/transform/image2tensor.cpp b/csrc/preprocess/transform/image2tensor.cpp
index 2adf959904..e2ccd3bb5d 100644
--- a/csrc/preprocess/transform/image2tensor.cpp
+++ b/csrc/preprocess/transform/image2tensor.cpp
@@ -16,7 +16,7 @@ ImageToTensorImpl::ImageToTensorImpl(const Value& args) : TransformImpl(args) {
 }
 
 Result<Value> ImageToTensorImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   Value output = input;
   for (auto& key : arg_.keys) {
     assert(input.contains(key));
@@ -28,14 +28,14 @@ Result<Value> ImageToTensorImpl::Process(const Value& input) {
 
     OUTCOME_TRY(output[key], HWC2CHW(src_tensor));
   }  // for key
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
 ImageToTensor::ImageToTensor(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<ImageToTensorImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'ImageToTensor' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'ImageToTensor' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'ImageToTensor' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -53,4 +53,5 @@ class ImageToTensorCreator : public Creator<Transform> {
   int version_{1};
 };
 REGISTER_MODULE(Transform, ImageToTensorCreator);
+MMDEPLOY_DEFINE_REGISTRY(ImageToTensorImpl);
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/image2tensor.h b/csrc/preprocess/transform/image2tensor.h
index cca2c5db7a..49eefd9f47 100644
--- a/csrc/preprocess/transform/image2tensor.h
+++ b/csrc/preprocess/transform/image2tensor.h
@@ -14,7 +14,7 @@ namespace mmdeploy {
  * it to (1, C, H, W).
  *
  */
-class ImageToTensorImpl : public TransformImpl {
+class MMDEPLOY_API ImageToTensorImpl : public TransformImpl {
  public:
   ImageToTensorImpl(const Value& args);
   ~ImageToTensorImpl() = default;
@@ -34,7 +34,7 @@ class ImageToTensorImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class ImageToTensor : public Transform {
+class MMDEPLOY_API ImageToTensor : public Transform {
  public:
   explicit ImageToTensor(const Value& args, int version = 0);
   ~ImageToTensor() = default;
@@ -45,6 +45,8 @@ class ImageToTensor : public Transform {
   std::unique_ptr<ImageToTensorImpl> impl_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(ImageToTensorImpl);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_IMAGE2TENSOR_H
diff --git a/csrc/preprocess/transform/load.cpp b/csrc/preprocess/transform/load.cpp
index 671948f2d8..462c70a837 100644
--- a/csrc/preprocess/transform/load.cpp
+++ b/csrc/preprocess/transform/load.cpp
@@ -31,7 +31,7 @@ PrepareImageImpl::PrepareImageImpl(const Value& args) : TransformImpl(args) {
    */
 
 Result<Value> PrepareImageImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   assert(input.contains("ori_img"));
 
   // copy input data, and update its properties later
@@ -50,7 +50,7 @@ Result<Value> PrepareImageImpl::Process(const Value& input) {
   }
   output["ori_shape"] = {1, src_mat.height(), src_mat.width(), src_mat.channel()};
   output["img_fields"].push_back("img");
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
 
   return output;
 }
@@ -58,7 +58,7 @@ Result<Value> PrepareImageImpl::Process(const Value& input) {
 PrepareImage::PrepareImage(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<PrepareImageImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'PrepareImage' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'PrepareImage' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'PrepareImage' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -80,4 +80,7 @@ class PrepareImageCreator : public Creator<Transform> {
 };
 
 REGISTER_MODULE(Transform, PrepareImageCreator);
+
+MMDEPLOY_DEFINE_REGISTRY(PrepareImageImpl);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/load.h b/csrc/preprocess/transform/load.h
index 32f0bdfdbd..a05d4c136f 100644
--- a/csrc/preprocess/transform/load.h
+++ b/csrc/preprocess/transform/load.h
@@ -8,7 +8,7 @@
 #include "transform.h"
 
 namespace mmdeploy {
-class PrepareImageImpl : public TransformImpl {
+class MMDEPLOY_API PrepareImageImpl : public TransformImpl {
  public:
   explicit PrepareImageImpl(const Value& args);
   ~PrepareImageImpl() = default;
@@ -29,7 +29,7 @@ class PrepareImageImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class PrepareImage : public Transform {
+class MMDEPLOY_API PrepareImage : public Transform {
  public:
   explicit PrepareImage(const Value& args, int version = 0);
   ~PrepareImage() = default;
@@ -40,6 +40,8 @@ class PrepareImage : public Transform {
   std::unique_ptr<PrepareImageImpl> impl_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(PrepareImageImpl);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_LOAD_H
diff --git a/csrc/preprocess/transform/normalize.cpp b/csrc/preprocess/transform/normalize.cpp
index a0bc5f7ba2..7fc9c2ad31 100644
--- a/csrc/preprocess/transform/normalize.cpp
+++ b/csrc/preprocess/transform/normalize.cpp
@@ -10,9 +10,11 @@ using namespace std;
 
 namespace mmdeploy {
 
+// MMDEPLOY_DEFINE_REGISTRY(NormalizeImpl);
+
 NormalizeImpl::NormalizeImpl(const Value& args) : TransformImpl(args) {
-  if (!args.contains("mean") or !args.contains("std")) {
-    ERROR("no 'mean' or 'std' is configured");
+  if (!args.contains("mean") || !args.contains("std")) {
+    MMDEPLOY_ERROR("no 'mean' or 'std' is configured");
     throw std::invalid_argument("no 'mean' or 'std' is configured");
   }
   for (auto& v : args["mean"]) {
@@ -50,7 +52,7 @@ NormalizeImpl::NormalizeImpl(const Value& args) : TransformImpl(args) {
  */
 
 Result<Value> NormalizeImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
 
   // copy input data, and update its properties later
   Value output = input;
@@ -73,14 +75,14 @@ Result<Value> NormalizeImpl::Process(const Value& input) {
     }
     output["img_norm_cfg"]["to_rgb"] = arg_.to_rgb;
   }
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
 Normalize::Normalize(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<NormalizeImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'Normalize' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'Normalize' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'Normalize' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -98,4 +100,6 @@ class NormalizeCreator : public Creator<Transform> {
 
 REGISTER_MODULE(Transform, NormalizeCreator);
 
+MMDEPLOY_DEFINE_REGISTRY(NormalizeImpl);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/normalize.h b/csrc/preprocess/transform/normalize.h
index 14a4edb43e..fef8fd17c6 100644
--- a/csrc/preprocess/transform/normalize.h
+++ b/csrc/preprocess/transform/normalize.h
@@ -8,7 +8,7 @@
 
 namespace mmdeploy {
 
-class NormalizeImpl : public TransformImpl {
+class MMDEPLOY_API NormalizeImpl : public TransformImpl {
  public:
   explicit NormalizeImpl(const Value& args);
   ~NormalizeImpl() = default;
@@ -28,7 +28,7 @@ class NormalizeImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class Normalize : public Transform {
+class MMDEPLOY_API Normalize : public Transform {
  public:
   explicit Normalize(const Value& args, int version = 0);
   ~Normalize() = default;
@@ -39,5 +39,7 @@ class Normalize : public Transform {
   std::unique_ptr<NormalizeImpl> impl_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(NormalizeImpl);
+
 }  // namespace mmdeploy
 #endif  // MMDEPLOY_NORMALIZE_H
diff --git a/csrc/preprocess/transform/pad.cpp b/csrc/preprocess/transform/pad.cpp
index 4d9c6c69a2..9eb60748f0 100644
--- a/csrc/preprocess/transform/pad.cpp
+++ b/csrc/preprocess/transform/pad.cpp
@@ -28,7 +28,7 @@ PadImpl::PadImpl(const Value& args) : TransformImpl(args) {
 }
 
 Result<Value> PadImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   Value output = input;
   auto img_fields = GetImageFields(input);
 
@@ -38,7 +38,7 @@ Result<Value> PadImpl::Process(const Value& input) {
 
     assert(tensor.desc().shape.size() == 4);
     assert(tensor.desc().shape[0] == 1);
-    assert(tensor.desc().shape[3] == 3 or tensor.desc().shape[3] == 1);
+    assert(tensor.desc().shape[3] == 3 || tensor.desc().shape[3] == 1);
 
     int height = tensor.desc().shape[1];
     int width = tensor.desc().shape[2];
@@ -75,14 +75,14 @@ Result<Value> PadImpl::Process(const Value& input) {
     }
   }
 
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
 Pad::Pad(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<PadImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'Pad' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'Pad' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'Pad' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -100,4 +100,6 @@ class PadCreator : public Creator<Transform> {
 
 REGISTER_MODULE(Transform, PadCreator);
 
+MMDEPLOY_DEFINE_REGISTRY(PadImpl);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/pad.h b/csrc/preprocess/transform/pad.h
index e684791a5c..1b5ccbcd5f 100644
--- a/csrc/preprocess/transform/pad.h
+++ b/csrc/preprocess/transform/pad.h
@@ -3,12 +3,14 @@
 #ifndef MMDEPLOY_PAD_H
 #define MMDEPLOY_PAD_H
 
+#include <array>
+
 #include "core/tensor.h"
 #include "transform.h"
 
 namespace mmdeploy {
 
-class PadImpl : public TransformImpl {
+class MMDEPLOY_API PadImpl : public TransformImpl {
  public:
   explicit PadImpl(const Value& args);
   ~PadImpl() override = default;
@@ -33,7 +35,7 @@ class PadImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class Pad : public Transform {
+class MMDEPLOY_API Pad : public Transform {
  public:
   explicit Pad(const Value& args, int version = 0);
   ~Pad() override = default;
@@ -43,6 +45,9 @@ class Pad : public Transform {
  protected:
   std::unique_ptr<PadImpl> impl_;
 };
+
+MMDEPLOY_DECLARE_REGISTRY(PadImpl);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_PAD_H
diff --git a/csrc/preprocess/transform/resize.cpp b/csrc/preprocess/transform/resize.cpp
index 604103e6c6..98398e3dce 100644
--- a/csrc/preprocess/transform/resize.cpp
+++ b/csrc/preprocess/transform/resize.cpp
@@ -19,14 +19,14 @@ ResizeImpl::ResizeImpl(const Value& args) : TransformImpl(args) {
       arg_.img_scale = {size, size};
     } else if (args["size"].is_array()) {
       if (args["size"].size() != 2) {
-        ERROR("'size' expects an array of size 2, but got {}", args["size"].size());
+        MMDEPLOY_ERROR("'size' expects an array of size 2, but got {}", args["size"].size());
         throw std::length_error("'size' expects an array of size 2");
       }
       auto height = args["size"][0].get<int>();
       auto width = args["size"][1].get<int>();
       arg_.img_scale = {height, width};
     } else {
-      ERROR("'size' is expected to be an integer or and array of size 2");
+      MMDEPLOY_ERROR("'size' is expected to be an integer or and array of size 2");
       throw std::domain_error("'size' is expected to be an integer or and array of size 2");
     }
   }
@@ -35,13 +35,13 @@ ResizeImpl::ResizeImpl(const Value& args) : TransformImpl(args) {
   vector<string> interpolations{"nearest", "bilinear", "bicubic", "area", "lanczos"};
   if (std::find(interpolations.begin(), interpolations.end(), arg_.interpolation) ==
       interpolations.end()) {
-    ERROR("'{}' interpolation is not supported", arg_.interpolation);
+    MMDEPLOY_ERROR("'{}' interpolation is not supported", arg_.interpolation);
     throw std::invalid_argument("unexpected interpolation");
   }
 }
 
 Result<Value> ResizeImpl::Process(const Value& input) {
-  DEBUG("input: {}", to_json(input).dump(2));
+  MMDEPLOY_DEBUG("input: {}", to_json(input).dump(2));
   Value output = input;
   auto img_fields = GetImageFields(input);
 
@@ -66,7 +66,7 @@ Result<Value> ResizeImpl::Process(const Value& input) {
       dst_h = int(h * scale_factor + 0.5);
       dst_w = int(w * scale_factor + 0.5);
     } else if (!arg_.img_scale.empty()) {
-      DEBUG(
+      MMDEPLOY_WARN(
           "neither 'scale' or 'scale_factor' is provided in input value. "
           "'img_scale' will be used");
       if (-1 == arg_.img_scale[1]) {
@@ -82,7 +82,7 @@ Result<Value> ResizeImpl::Process(const Value& input) {
         dst_w = arg_.img_scale[1];
       }
     } else {
-      ERROR("no resize related parameter is provided");
+      MMDEPLOY_ERROR("no resize related parameter is provided");
       return Status(eInvalidArgument);
     }
     if (arg_.keep_ratio) {
@@ -111,14 +111,14 @@ Result<Value> ResizeImpl::Process(const Value& input) {
     output[key] = dst_img;
   }
 
-  DEBUG("output: {}", to_json(output).dump(2));
+  MMDEPLOY_DEBUG("output: {}", to_json(output).dump(2));
   return output;
 }
 
 Resize::Resize(const Value& args, int version) : Transform(args) {
   auto impl_creator = Registry<ResizeImpl>::Get().GetCreator(specified_platform_, version);
   if (nullptr == impl_creator) {
-    ERROR("'Resize' is not supported on '{}' platform", specified_platform_);
+    MMDEPLOY_ERROR("'Resize' is not supported on '{}' platform", specified_platform_);
     throw std::domain_error("'Resize' is not supported on specified platform");
   }
   impl_ = impl_creator->Create(args);
@@ -136,4 +136,6 @@ class ResizeCreator : public Creator<Transform> {
 
 REGISTER_MODULE(Transform, ResizeCreator);
 
+MMDEPLOY_DEFINE_REGISTRY(ResizeImpl);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/resize.h b/csrc/preprocess/transform/resize.h
index 1b9c10034d..54947bee48 100644
--- a/csrc/preprocess/transform/resize.h
+++ b/csrc/preprocess/transform/resize.h
@@ -3,11 +3,13 @@
 #ifndef MMDEPLOY_RESIZE_H
 #define MMDEPLOY_RESIZE_H
 
+#include <array>
+
 #include "core/tensor.h"
 #include "transform.h"
 
 namespace mmdeploy {
-class ResizeImpl : public TransformImpl {
+class MMDEPLOY_API ResizeImpl : public TransformImpl {
  public:
   explicit ResizeImpl(const Value& args);
   ~ResizeImpl() override = default;
@@ -29,7 +31,7 @@ class ResizeImpl : public TransformImpl {
   ArgType arg_;
 };
 
-class Resize : public Transform {
+class MMDEPLOY_API Resize : public Transform {
  public:
   explicit Resize(const Value& args, int version = 0);
   ~Resize() override = default;
@@ -40,5 +42,8 @@ class Resize : public Transform {
   std::unique_ptr<ResizeImpl> impl_;
   static const std::string name_;
 };
+
+MMDEPLOY_DECLARE_REGISTRY(ResizeImpl);
+
 }  // namespace mmdeploy
 #endif  // MMDEPLOY_RESIZE_H
diff --git a/csrc/preprocess/transform/transform.cpp b/csrc/preprocess/transform/transform.cpp
index d57cfa393d..f2be7519c4 100644
--- a/csrc/preprocess/transform/transform.cpp
+++ b/csrc/preprocess/transform/transform.cpp
@@ -48,4 +48,6 @@ Transform::Transform(const Value &args) {
   }
 }
 
+MMDEPLOY_DEFINE_REGISTRY(Transform);
+
 }  // namespace mmdeploy
diff --git a/csrc/preprocess/transform/transform.h b/csrc/preprocess/transform/transform.h
index fa1a700ea2..ba96e91a14 100644
--- a/csrc/preprocess/transform/transform.h
+++ b/csrc/preprocess/transform/transform.h
@@ -9,7 +9,7 @@
 
 namespace mmdeploy {
 
-class TransformImpl : public Module {
+class MMDEPLOY_API TransformImpl : public Module {
  public:
   TransformImpl() = default;
   explicit TransformImpl(const Value& args);
@@ -23,41 +23,36 @@ class TransformImpl : public Module {
   Stream stream_;
 };
 
-class Transform : public Module {
+class MMDEPLOY_API Transform : public Module {
  public:
+  ~Transform() override = default;
+
   Transform() = default;
   explicit Transform(const Value& args);
-  ~Transform() override = default;
+  Transform(const Transform&) = delete;
+  Transform& operator=(const Transform&) = delete;
 
   const std::string& RuntimePlatform() const { return runtime_platform_; }
 
  protected:
   template <typename T>
-  [[deprecated]]
-  /*
-   * We cannot LOG the error message, because WARN/INFO/ERROR causes
-   * redefinition when building UTs "catch2.hpp" used in UTs has the same LOG
-   * declaration
-   */
-  std::unique_ptr<T>
-  Instantiate(const char* transform_type, const Value& args, int version = 0) {
+  [[deprecated]] std::unique_ptr<T> Instantiate(const char* transform_type, const Value& args,
+                                                int version = 0) {
     std::unique_ptr<T> impl(nullptr);
     auto impl_creator = Registry<T>::Get().GetCreator(specified_platform_, version);
     if (nullptr == impl_creator) {
-      //      WARN("cannot find {} implementation on specific platform {} ",
-      //           transform_type, specified_platform_);
+      MMDEPLOY_WARN("cannot find {} implementation on specific platform {} ", transform_type,
+                    specified_platform_);
       for (auto& name : candidate_platforms_) {
         impl_creator = Registry<T>::Get().GetCreator(name);
         if (impl_creator) {
-          //          INFO("fallback {} implementation to platform {}", transform_type,
-          //               name);
+          MMDEPLOY_INFO("fallback {} implementation to platform {}", transform_type, name);
           break;
         }
       }
     }
     if (nullptr == impl_creator) {
-      //      ERROR("cannot find {} implementation on any registered platform ",
-      //            transform_type);
+      MMDEPLOY_ERROR("cannot find {} implementation on any registered platform ", transform_type);
       return nullptr;
     } else {
       return impl_creator->Create(args);
@@ -70,6 +65,8 @@ class Transform : public Module {
   std::vector<std::string> candidate_platforms_;
 };
 
+MMDEPLOY_DECLARE_REGISTRY(Transform);
+
 }  // namespace mmdeploy
 
 #endif  // MMDEPLOY_TRANSFORM_H
diff --git a/csrc/preprocess/transform_module.cpp b/csrc/preprocess/transform_module.cpp
index b769878212..9b7b2f01fd 100644
--- a/csrc/preprocess/transform_module.cpp
+++ b/csrc/preprocess/transform_module.cpp
@@ -3,6 +3,7 @@
 #include "transform_module.h"
 
 #include "archive/value_archive.h"
+#include "core/module.h"
 #include "core/utils/formatter.h"
 #include "experimental/module_adapter.h"
 #include "preprocess/transform/transform.h"
@@ -15,12 +16,12 @@ TransformModule::TransformModule(const Value& args) {
   const auto type = "Compose";
   auto creator = Registry<Transform>::Get().GetCreator(type, 1);
   if (!creator) {
-    ERROR("unable to find creator: {}", type);
+    MMDEPLOY_ERROR("unable to find creator: {}", type);
     throw_exception(eEntryNotFound);
   }
   auto cfg = args;
   if (cfg.contains("device")) {
-    WARN("force using device: {}", cfg["device"].get<const char*>());
+    MMDEPLOY_WARN("force using device: {}", cfg["device"].get<const char*>());
     auto device = Device(cfg["device"].get<const char*>());
     cfg["context"]["device"] = device;
     cfg["context"]["stream"] = Stream::GetDefault(device);
@@ -31,7 +32,7 @@ TransformModule::TransformModule(const Value& args) {
 Result<Value> TransformModule::operator()(const Value& input) {
   auto output = transform_->Process(input);
   if (!output) {
-    ERROR("error: {}", output.error().message().c_str());
+    MMDEPLOY_ERROR("error: {}", output.error().message().c_str());
   }
   auto& ret = output.value();
   if (ret.is_object()) {
@@ -39,13 +40,13 @@ Result<Value> TransformModule::operator()(const Value& input) {
   } else if (ret.is_array() && ret.size() == 1 && ret[0].is_object()) {
     ret = ret[0];
   } else {
-    ERROR("unsupported return value: {}", ret);
+    MMDEPLOY_ERROR("unsupported return value: {}", ret);
     return Status(eNotSupported);
   }
   return ret;
 }
 
-class TransformModuleCreator : public Creator<Module> {
+class MMDEPLOY_API TransformModuleCreator : public Creator<Module> {
  public:
   const char* GetName() const override { return "Transform"; }
   int GetVersion() const override { return 0; }
diff --git a/csrc/utils/CMakeLists.txt b/csrc/utils/CMakeLists.txt
new file mode 100644
index 0000000000..1eef35940a
--- /dev/null
+++ b/csrc/utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+add_subdirectory(opencv)
diff --git a/csrc/utils/opencv/CMakeLists.txt b/csrc/utils/opencv/CMakeLists.txt
new file mode 100644
index 0000000000..6eb8bd2e91
--- /dev/null
+++ b/csrc/utils/opencv/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+cmake_minimum_required(VERSION 3.14)
+project(mmdeploy_opencv_utils)
+
+include(${CMAKE_SOURCE_DIR}/cmake/opencv.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/MMDeploy.cmake)
+
+mmdeploy_add_library(${PROJECT_NAME} opencv_utils.cpp)
+
+target_link_libraries(${PROJECT_NAME}
+        PRIVATE mmdeploy::core
+        PUBLIC ${OpenCV_LIBS})
+
+target_include_directories(${PROJECT_NAME}
+        INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
+
+#export_module(${PROJECT_NAME})
diff --git a/csrc/preprocess/cpu/opencv_utils.cpp b/csrc/utils/opencv/opencv_utils.cpp
similarity index 91%
rename from csrc/preprocess/cpu/opencv_utils.cpp
rename to csrc/utils/opencv/opencv_utils.cpp
index d02d5571cd..ef05dbf2eb 100644
--- a/csrc/preprocess/cpu/opencv_utils.cpp
+++ b/csrc/utils/opencv/opencv_utils.cpp
@@ -42,7 +42,7 @@ cv::Mat Mat2CVMat(const Mat& mat) {
                                       {DataType::kINT32, CV_32S}};
   auto type = CV_MAKETYPE(type_mapper[mat.type()], mat.channel());
   auto format = mat.pixel_format();
-  if (PixelFormat::kBGR == format or PixelFormat::kRGB == format) {
+  if (PixelFormat::kBGR == format || PixelFormat::kRGB == format) {
     return cv::Mat(mat.height(), mat.width(), type, mat.data<void>());
   } else if (PixelFormat::kGRAYSCALE == format) {
     return cv::Mat(mat.height(), mat.width(), type, mat.data<void>());
@@ -59,7 +59,7 @@ cv::Mat Mat2CVMat(const Mat& mat) {
   } else if (PixelFormat::kBGRA == format) {
     return cv::Mat(mat.height(), mat.width(), type, mat.data<void>());
   } else {
-    ERROR("unsupported mat format {}", format);
+    MMDEPLOY_ERROR("unsupported mat format {}", format);
     return {};
   }
 }
@@ -78,7 +78,7 @@ cv::Mat Tensor2CVMat(const Tensor& tensor) {
     return {h, w, CV_32SC(c), const_cast<void*>(tensor.data())};
   } else {
     assert(0);
-    ERROR("unsupported type: {}", desc.data_type);
+    MMDEPLOY_ERROR("unsupported type: {}", desc.data_type);
     return {};
   }
 }
@@ -95,7 +95,7 @@ Tensor CVMat2Tensor(const cv::Mat& mat) {
     shape = {1, mat.rows, mat.cols, mat.channels()};
     data_type = DataType::kINT32;
   } else {
-    ERROR("unsupported mat dat type {}", mat.type());
+    MMDEPLOY_ERROR("unsupported mat dat type {}", mat.type());
     assert(0);
     return {};
   }
@@ -118,7 +118,7 @@ cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width,
   } else if (interpolation == "lanczos") {
     cv::resize(src, dst, dst.size(), 0, 0, cv::INTER_LANCZOS4);
   } else {
-    ERROR("{} interpolation is not supported", interpolation);
+    MMDEPLOY_ERROR("{} interpolation is not supported", interpolation);
     assert(0);
   }
   return dst;
@@ -189,7 +189,7 @@ cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format, PixelFormat ds
         cv::cvtColor(src, dst, cv::COLOR_BGRA2BGR);
         break;
       default:
-        ERROR("unsupported src mat's element type {}", src_format);
+        MMDEPLOY_ERROR("unsupported src mat's element type {}", src_format);
         assert(0);
         return {};
     }
@@ -214,7 +214,7 @@ cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format, PixelFormat ds
         cv::cvtColor(src, dst, cv::COLOR_BGRA2RGB);
         break;
       default:
-        ERROR("unsupported src mat's element type {}", src_format);
+        MMDEPLOY_ERROR("unsupported src mat's element type {}", src_format);
         assert(0);
         return {};
     }
@@ -239,12 +239,12 @@ cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format, PixelFormat ds
         cv::cvtColor(src, dst, cv::COLOR_BGRA2GRAY);
         break;
       default:
-        ERROR("unsupported src mat's element type {}", src_format);
+        MMDEPLOY_ERROR("unsupported src mat's element type {}", src_format);
         assert(0);
         return {};
     }
   } else {
-    ERROR("unsupported target mat's element type {}", dst_format);
+    MMDEPLOY_ERROR("unsupported target mat's element type {}", dst_format);
     assert(0);
     return {};
   }
@@ -267,7 +267,7 @@ bool Compare(const cv::Mat& src1, const cv::Mat& src2) {
   cv::subtract(_src1, _src2, diff);
   diff = cv::abs(diff);
   auto sum = cv::sum(cv::sum(diff));
-  DEBUG("sum: {}, average: {}", sum[0], sum[0] * 1.0 / (src1.rows * src1.cols));
+  MMDEPLOY_DEBUG("sum: {}, average: {}", sum[0], sum[0] * 1.0 / (src1.rows * src1.cols));
   return sum[0] / (src1.rows * src1.cols) < 0.5f;
 }
 
diff --git a/csrc/preprocess/cpu/opencv_utils.h b/csrc/utils/opencv/opencv_utils.h
similarity index 69%
rename from csrc/preprocess/cpu/opencv_utils.h
rename to csrc/utils/opencv/opencv_utils.h
index 45aa360ff3..05f8405eb4 100644
--- a/csrc/preprocess/cpu/opencv_utils.h
+++ b/csrc/utils/opencv/opencv_utils.h
@@ -1,7 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#ifndef MMDEPLOY_OPENCV_UTILS_H
-#define MMDEPLOY_OPENCV_UTILS_H
+#ifndef MMDEPLOY_CSRC_UTILS_OPENCV_OPENCV_UTILS_H_
+#define MMDEPLOY_CSRC_UTILS_OPENCV_OPENCV_UTILS_H_
 
 #include "core/mat.h"
 #include "core/mpl/type_traits.h"
@@ -12,11 +12,11 @@
 namespace mmdeploy {
 namespace cpu {
 
-cv::Mat Mat2CVMat(const Mat& mat);
-cv::Mat Tensor2CVMat(const Tensor& tensor);
+MMDEPLOY_API cv::Mat Mat2CVMat(const Mat& mat);
+MMDEPLOY_API cv::Mat Tensor2CVMat(const Tensor& tensor);
 
-Mat CVMat2Mat(const cv::Mat& mat, PixelFormat format);
-Tensor CVMat2Tensor(const cv::Mat& mat);
+MMDEPLOY_API Mat CVMat2Mat(const cv::Mat& mat, PixelFormat format);
+MMDEPLOY_API Tensor CVMat2Tensor(const cv::Mat& mat);
 
 /**
  * @brief resize an image to specified size
@@ -26,7 +26,8 @@ Tensor CVMat2Tensor(const cv::Mat& mat);
  * @param dst_width output image's width
  * @return output image if success, error code otherwise
  */
-cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width, const std::string& interpolation);
+MMDEPLOY_API cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width,
+                            const std::string& interpolation);
 
 /**
  * @brief crop an image
@@ -38,7 +39,7 @@ cv::Mat Resize(const cv::Mat& src, int dst_height, int dst_width, const std::str
  * @param right
  * @return cv::Mat
  */
-cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right);
+MMDEPLOY_API cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right);
 
 /**
  * @brief Do normalization to an image
@@ -50,8 +51,8 @@ cv::Mat Crop(const cv::Mat& src, int top, int left, int bottom, int right);
  * @param inplace
  * @return cv::Mat
  */
-cv::Mat Normalize(cv::Mat& src, const std::vector<float>& mean, const std::vector<float>& std,
-                  bool to_rgb, bool inplace = true);
+MMDEPLOY_API cv::Mat Normalize(cv::Mat& src, const std::vector<float>& mean,
+                               const std::vector<float>& std, bool to_rgb, bool inplace = true);
 
 /**
  * @brief tranpose an image, from {h, w, c} to {c, h, w}
@@ -59,7 +60,7 @@ cv::Mat Normalize(cv::Mat& src, const std::vector<float>& mean, const std::vecto
  * @param src input image
  * @return
  */
-cv::Mat Transpose(const cv::Mat& src);
+MMDEPLOY_API cv::Mat Transpose(const cv::Mat& src);
 
 /**
  * @brief convert an image to another color space
@@ -69,7 +70,8 @@ cv::Mat Transpose(const cv::Mat& src);
  * @param dst_format
  * @return
  */
-cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format, PixelFormat dst_format);
+MMDEPLOY_API cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format,
+                                   PixelFormat dst_format);
 
 /**
  *
@@ -82,8 +84,8 @@ cv::Mat ColorTransfer(const cv::Mat& src, PixelFormat src_format, PixelFormat ds
  * @param val
  * @return
  */
-cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type,
-            float val);
+MMDEPLOY_API cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right,
+                         int border_type, float val);
 
 /**
  * @brief compare two images
@@ -92,7 +94,7 @@ cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int bo
  * @param src2 the other input image
  * @return bool true means the images are the same
  */
-bool Compare(const cv::Mat& src1, const cv::Mat& src2);
+MMDEPLOY_API bool Compare(const cv::Mat& src1, const cv::Mat& src2);
 
 }  // namespace cpu
 
@@ -104,8 +106,6 @@ struct IsCvPoint : std::false_type {};
 template <typename T>
 struct IsCvPoint<::cv::Point_<T>> : std::true_type {};
 
-}  // namespace detail
-
 template <typename Archive, typename T,
           std::enable_if_t<detail::IsCvPoint<uncvref_t<T>>::value, int> = 0>
 void serialize(Archive&& archive, T&& p) {
@@ -146,6 +146,8 @@ void load(Archive& archive, std::vector<T>& v) {
   }
 }
 
+}  // namespace detail
+
 }  // namespace mmdeploy
 
-#endif  // MMDEPLOY_OPENCV_UTILS_H
+#endif  // MMDEPLOY_CSRC_UTILS_OPENCV_OPENCV_UTILS_H_
diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt
index 3da3b04b79..3e1bdcc6fb 100644
--- a/demo/csrc/CMakeLists.txt
+++ b/demo/csrc/CMakeLists.txt
@@ -2,13 +2,18 @@
 cmake_minimum_required(VERSION 3.14)
 project(mmdeploy-example)
 
-find_package(OpenCV REQUIRED)
 find_package(MMDeploy REQUIRED)
 
 function(add_example name)
-  add_executable(${name} ${name}.cpp)
-  target_link_libraries(${name} ${MMDeploy_LIBS} -Wl,--disable-new-dtags
-  opencv_imgcodecs opencv_imgproc opencv_core)
+  file(GLOB _SRCS ${name}.c*)
+  add_executable(${name} ${_SRCS})
+  if (NOT MSVC)
+    # disable new dtags so that executables can run even without LD_LIBRARY_PATH set
+    target_link_libraries(${name} PRIVATE -Wl,--disable-new-dtags)
+  endif ()
+  mmdeploy_load_static(${name} MMDeployStaticModules)
+  mmdeploy_load_dynamic(${name} MMDeployDynamicModules)
+  target_link_libraries(${name} PRIVATE MMDeployLibs ${OpenCV_LIBS})
 endfunction()
 
 add_example(image_classification)
diff --git a/demo/csrc/image_classification.cpp b/demo/csrc/image_classification.cpp
index be618659be..18d1e0793e 100644
--- a/demo/csrc/image_classification.cpp
+++ b/demo/csrc/image_classification.cpp
@@ -1,5 +1,5 @@
 #include <fstream>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs/imgcodecs.hpp>
 #include <string>
 
 #include "classifier.h"
diff --git a/demo/csrc/image_restorer.cpp b/demo/csrc/image_restorer.cpp
index 4e462ce083..3984c88a8a 100644
--- a/demo/csrc/image_restorer.cpp
+++ b/demo/csrc/image_restorer.cpp
@@ -1,7 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
 #include <fstream>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <string>
 
diff --git a/demo/csrc/image_segmentation.cpp b/demo/csrc/image_segmentation.cpp
index 71b3108152..8502ecec0c 100644
--- a/demo/csrc/image_segmentation.cpp
+++ b/demo/csrc/image_segmentation.cpp
@@ -1,7 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
 #include <fstream>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <random>
 #include <string>
@@ -13,7 +13,7 @@ using namespace std;
 
 vector<cv::Vec3b> gen_palette(int num_classes) {
   std::mt19937 gen;
-  std::uniform_int_distribution<uchar> uniform_dist(0, 255);
+  std::uniform_int_distribution<ushort> uniform_dist(0, 255);
 
   vector<cv::Vec3b> palette;
   palette.reserve(num_classes);
diff --git a/demo/csrc/object_detection.cpp b/demo/csrc/object_detection.cpp
index a57b4f41f6..1843407532 100644
--- a/demo/csrc/object_detection.cpp
+++ b/demo/csrc/object_detection.cpp
@@ -1,5 +1,5 @@
 #include <fstream>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <string>
 
diff --git a/demo/csrc/ocr.cpp b/demo/csrc/ocr.cpp
index f82d1eca67..1bb8d43ef2 100644
--- a/demo/csrc/ocr.cpp
+++ b/demo/csrc/ocr.cpp
@@ -1,5 +1,5 @@
 #include <fstream>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgcodecs/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <string>
 
diff --git a/demo/demo_rewrite.py b/demo/demo_rewrite.py
index 5adc581f30..a624c26eba 100644
--- a/demo/demo_rewrite.py
+++ b/demo/demo_rewrite.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import asyncio
 import os
 import shutil
diff --git a/docs/en/backends/openvino.md b/docs/en/backends/openvino.md
index d043c578ac..12a6686d36 100644
--- a/docs/en/backends/openvino.md
+++ b/docs/en/backends/openvino.md
@@ -27,7 +27,7 @@ sudo apt-get install libpython3.7
 Example:
 ```bash
 python tools/deploy.py \
-    configs/mmdet/detection/detection_openvino_dynamic.py \
+    configs/mmdet/detection/detection_openvino_static-300x300.py \
     /mmdetection_dir/mmdetection/configs/ssd/ssd300_coco.py \
     /tmp/snapshots/ssd300_coco_20210803_015428-d231a06e.pth \
     tests/data/tiger.jpeg \
diff --git a/docs/en/backends/torchscript.md b/docs/en/backends/torchscript.md
new file mode 100644
index 0000000000..30449444a1
--- /dev/null
+++ b/docs/en/backends/torchscript.md
@@ -0,0 +1,54 @@
+## TorchScript support
+
+### Introduction of TorchScript
+
+**TorchScript** a way to create serializable and optimizable models from PyTorch code. Any TorchScript program can be saved from a Python process and loaded in a process where there is no Python dependency. Check the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) for more details.
+
+### Build custom ops
+
+#### Prerequisite
+
+- Download libtorch from the official website [here](https://pytorch.org/get-started/locally/).
+
+*Please note that only **Pre-cxx11 ABI** and **version 1.8.1+** on Linux platform are supported by now.*
+
+For previous versions of libtorch, users can find through the [issue comment](https://github.com/pytorch/pytorch/issues/40961#issuecomment-1017317786). Libtorch1.8.1+cu111 as an example, extract it, expose `Torch_DIR` and add the lib path to `LD_LIBRARY_PATH` as below:
+
+```bash
+wget https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.1%2Bcu111.zip
+
+unzip libtorch-shared-with-deps-1.8.1+cu111.zip
+cd libtorch
+export Torch_DIR=$(pwd)
+export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH
+```
+
+Note:
+
+- If you want to save libtorch env variables to bashrc, you could run
+
+    ```bash
+    echo '# set env for libtorch' >> ~/.bashrc
+    echo "export Torch_DIR=${Torch_DIR}" >> ~/.bashrc
+    echo 'export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH' >> ~/.bashrc
+    source ~/.bashrc
+    ```
+
+#### Build on Linux
+
+```bash
+cd ${MMDEPLOY_DIR} # To MMDeploy root directory
+mkdir -p build && cd build
+cmake -DMMDEPLOY_TARGET_BACKENDS=torchscript -DTorch_DIR=${Torch_DIR} ..
+make -j$(nproc)
+```
+
+### How to convert a model
+
+- You could follow the instructions of tutorial [How to convert model](../tutorials/how_to_convert_model.md)
+
+### FAQs
+
+- Error: `projects/thirdparty/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:96 (message):Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN libraries.  Please set the proper cuDNN prefixes and / or install cuDNN.`
+
+  May export CUDNN_ROOT=/root/path/to/cudnn to resolve the build error.
diff --git a/docs/en/benchmark.md b/docs/en/benchmark.md
index 31b26e2742..60331d7bf4 100644
--- a/docs/en/benchmark.md
+++ b/docs/en/benchmark.md
@@ -690,6 +690,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <th align="center" colspan="3">MMCls</th>
     <th align="center">PyTorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNX Runtime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -702,6 +703,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Task</td>
     <td align="center">Metrics</td>
     <td align="center">fp32</td>
+    <td align="center">seresnet</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp16</td>
@@ -714,6 +716,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Classification</td>
     <td align="center">top-1</td>
     <td align="center">69.90</td>
+    <td align="center">69.90</td>
     <td align="center">69.88</td>
     <td align="center">69.88</td>
     <td align="center">69.86</td>
@@ -724,6 +727,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">top-5</td>
     <td align="center">89.43</td>
+    <td align="center">89.43</td>
     <td align="center">89.34</td>
     <td align="center">89.34</td>
     <td align="center">89.33</td>
@@ -737,6 +741,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">77.90</td>
     <td align="center">77.90</td>
     <td align="center">77.90</td>
+    <td align="center">77.90</td>
     <td align="center">-</td>
     <td align="center">77.78</td>
     <td align="center">77.89</td>
@@ -747,6 +752,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">93.66</td>
     <td align="center">93.66</td>
     <td align="center">93.66</td>
+    <td align="center">93.66</td>
     <td align="center">-</td>
     <td align="center">93.64</td>
     <td align="center">93.65</td>
@@ -758,6 +764,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">77.74</td>
     <td align="center">77.74</td>
     <td align="center">77.74</td>
+    <td align="center">77.74</td>
     <td align="center">77.75</td>
     <td align="center">77.63</td>
     <td align="center">77.73</td>
@@ -768,6 +775,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">93.84</td>
     <td align="center">93.84</td>
     <td align="center">93.84</td>
+    <td align="center">93.84</td>
     <td align="center">93.83</td>
     <td align="center">93.72</td>
     <td align="center">93.84</td>
@@ -780,6 +788,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">68.13</td>
     <td align="center">68.13</td>
     <td align="center">68.13</td>
+    <td align="center">68.13</td>
     <td align="center">67.71</td>
     <td align="center">68.11</td>
     <td rowspan="2">$MMCLS_DIR/configs/shufflenet_v1/shufflenet_v1_1x_b64x16_linearlr_bn_nowd_imagenet.py</td>
@@ -790,6 +799,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">87.81</td>
     <td align="center">87.81</td>
     <td align="center">87.81</td>
+    <td align="center">87.81</td>
     <td align="center">87.58</td>
     <td align="center">87.80</td>
   </tr>
@@ -800,6 +810,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">69.55</td>
     <td align="center">69.55</td>
     <td align="center">69.55</td>
+    <td align="center">69.55</td>
     <td align="center">69.54</td>
     <td align="center">69.10</td>
     <td align="center">69.54</td>
@@ -810,6 +821,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">88.92</td>
     <td align="center">88.92</td>
     <td align="center">88.92</td>
+    <td align="center">88.92</td>
     <td align="center">88.91</td>
     <td align="center">88.58</td>
     <td align="center">88.92</td>
@@ -821,6 +833,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">71.86</td>
     <td align="center">71.86</td>
     <td align="center">71.86</td>
+    <td align="center">71.86</td>
     <td align="center">71.87</td>
     <td align="center">70.91</td>
     <td align="center">71.84</td>
@@ -831,6 +844,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">90.42</td>
     <td align="center">90.42</td>
     <td align="center">90.42</td>
+    <td align="center">90.42</td>
     <td align="center">90.40</td>
     <td align="center">89.85</td>
     <td align="center">90.41</td>
@@ -848,6 +862,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <th align="center" colspan="4">MMDet</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -864,6 +879,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -876,6 +892,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">33.7</td>
+    <td align="center">33.7</td>
     <td align="center">-</td>
     <td align="center">33.5</td>
     <td align="center">33.5</td>
@@ -890,6 +907,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">25.5</td>
+    <td align="center">25.5</td>
     <td align="center">-</td>
     <td align="center">25.5</td>
     <td align="center">25.5</td>
@@ -904,6 +922,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">36.5</td>
+    <td align="center">36.4</td>
     <td align="center">-</td>
     <td align="center">36.4</td>
     <td align="center">36.4</td>
@@ -919,6 +938,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">box AP</td>
     <td align="center">36.6</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">36.6</td>
     <td align="center">36.5</td>
     <td align="center">-</td>
@@ -932,6 +952,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">37.4</td>
+    <td align="center">37.4</td>
     <td align="center">-</td>
     <td align="center">37.4</td>
     <td align="center">37.4</td>
@@ -946,6 +967,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">40.5</td>
+    <td align="center">40.3</td>
     <td align="center">-</td>
     <td align="center">40.3</td>
     <td align="center">40.3</td>
@@ -960,6 +982,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">37.4</td>
+    <td align="center">37.3</td>
     <td align="center">-</td>
     <td align="center">37.3</td>
     <td align="center">37.3</td>
@@ -975,6 +998,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">box AP</td>
     <td align="center">39.4</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">39.4</td>
     <td align="center">39.4</td>
     <td align="center">-</td>
@@ -989,6 +1013,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">box AP</td>
     <td align="center">40.4</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">40.4</td>
     <td align="center">40.4</td>
     <td align="center">-</td>
@@ -996,12 +1021,27 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">-</td>
     <td>$MMDET_DIR/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py</td>
   </tr>
+  <tr>
+    <td align="center">GFL</td>
+    <td align="center">Object Detection</td>
+    <td align="center">COCO2017</td>
+    <td align="center">box AP</td>
+    <td align="center">40.2</td>
+    <td align="center">-</td>
+    <td align="center">40.2</td>
+    <td align="center">40.2</td>
+    <td align="center">40.0</td>
+    <td align="center">-</td>
+    <td align="center">-</td>
+    <td>$MMDET_DIR/configs/gfl/gfl_r50_fpn_1x_coco.py</td>
+  </tr>
   <tr>
     <td align="center" rowspan="2">Mask R-CNN</td>
     <td align="center" rowspan="2">Instance Segmentation</td>
     <td align="center" rowspan="2">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">38.2</td>
+    <td align="center">38.1</td>
     <td align="center">-</td>
     <td align="center">38.1</td>
     <td align="center">38.1</td>
@@ -1013,6 +1053,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">mask AP</td>
     <td align="center">34.7</td>
+    <td align="center">34.7</td>
     <td align="center">-</td>
     <td align="center">33.7</td>
     <td align="center">33.7</td>
@@ -1033,6 +1074,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <th align="center" colspan="4">MMEdit</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNX Runtime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1040,7 +1082,6 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   </tr>
 </thead>
 <tbody>
-
   <tr>
     <td align="center">Model</td>
     <td align="center">Task</td>
@@ -1049,6 +1090,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1060,6 +1102,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.4316</td>
+    <td align="center">28.4120</td>
     <td align="center">28.4323</td>
     <td align="center">28.4323</td>
     <td align="center">28.4286</td>
@@ -1070,6 +1113,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8099</td>
+    <td align="center">0.8106</td>
     <td align="center">0.8097</td>
     <td align="center">0.8097</td>
     <td align="center">0.8096</td>
@@ -1082,6 +1126,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.2700</td>
+    <td align="center">28.2619</td>
     <td align="center">28.2592</td>
     <td align="center">28.2592</td>
     <td align="center"> - </td>
@@ -1092,6 +1137,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.7778</td>
+    <td align="center">0.7784</td>
     <td align="center">0.7764</td>
     <td align="center">0.7774</td>
     <td align="center"> - </td>
@@ -1104,6 +1150,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.6428</td>
+    <td align="center">30.6306</td>
     <td align="center">30.6444</td>
     <td align="center">30.6430</td>
     <td align="center"> - </td>
@@ -1112,8 +1159,9 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td rowspan="2">$MMEDIT_DIR/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py</td>
   </tr>
   <tr>
-    <td align="center"></td>
+    <td align="center">SSIM</td>
     <td align="center">0.8559</td>
+    <td align="center">0.8565</td>
     <td align="center">0.8558</td>
     <td align="center">0.8558</td>
     <td align="center"> - </td>
@@ -1126,16 +1174,18 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">27.9499</td>
+    <td align="center">27.9252</td>
     <td align="center">27.9408</td>
     <td align="center">27.9408</td>
     <td align="center"> - </td>
     <td align="center"> - </td>
     <td align="center">27.9388</td>
-    <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.pyy</td>
+    <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.py</td>
   </tr>
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.7846</td>
+    <td align="center">0.7851</td>
     <td align="center">0.7839</td>
     <td align="center">0.7839</td>
     <td align="center"> - </td>
@@ -1148,6 +1198,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.2252</td>
+    <td align="center">30.2069</td>
     <td align="center">30.2300</td>
     <td align="center">30.2300</td>
     <td align="center"> - </td>
@@ -1156,8 +1207,9 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/msrresnet_x4c64b16_g1_1000k_div2k.py</td>
   </tr>
   <tr>
-    <td align="center"></td>
+    <td align="center">SSIM</td>
     <td align="center">0.8491</td>
+    <td align="center">0.8497</td>
     <td align="center">0.8488</td>
     <td align="center">0.8488</td>
     <td align="center"> - </td>
@@ -1170,6 +1222,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.0297</td>
+    <td align="center">-</td>
     <td align="center">27.7016</td>
     <td align="center">27.7016</td>
     <td align="center"> - </td>
@@ -1180,6 +1233,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8236</td>
+    <td align="center">-</td>
     <td align="center">0.8122</td>
     <td align="center">0.8122</td>
     <td align="center"> - </td>
@@ -1192,6 +1246,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.2223</td>
+    <td align="center">30.2192</td>
     <td align="center">30.2214</td>
     <td align="center">30.2214</td>
     <td align="center">30.2211</td>
@@ -1202,6 +1257,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8500</td>
+    <td align="center">0.8507</td>
     <td align="center">0.8497</td>
     <td align="center">0.8497</td>
     <td align="center">0.8497</td>
@@ -1221,6 +1277,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <th align="center" colspan="4">MMOCR</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1237,6 +1294,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1249,6 +1307,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center" rowspan="3">ICDAR2015</td>
     <td align="center">recall</td>
     <td align="center">0.7310</td>
+    <td align="center">0.7308</td>
     <td align="center">0.7304</td>
     <td align="center">0.7198</td>
     <td align="center">0.7179</td>
@@ -1261,6 +1320,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">precision</td>
     <td align="center">0.8714</td>
     <td align="center">0.8718</td>
+    <td align="center">0.8714</td>
     <td align="center">0.8677</td>
     <td align="center">0.8674</td>
     <td align="center">0.8688</td>
@@ -1271,6 +1331,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">hmean</td>
     <td align="center">0.7950</td>
     <td align="center">0.7949</td>
+    <td align="center">0.7950</td>
     <td align="center">0.7868</td>
     <td align="center">0.7856</td>
     <td align="center">0.7821</td>
@@ -1285,6 +1346,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
+    <td align="center">0.8067</td>
     <td align="center">0.8063</td>
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
@@ -1297,6 +1359,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">IIIT5K</td>
     <td align="center">acc</td>
     <td align="center">0.9517</td>
+    <td align="center">-</td>
     <td align="center">0.9287</td>
     <td align="center">-</td>
     <td align="center">-</td>
@@ -1318,6 +1381,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
   <tr>
     <th align="center" colspan="3">MMSeg</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1332,6 +1396,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1342,6 +1407,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">72.25</td>
+    <td align="center">72.36</td>
     <td align="center">-</td>
     <td align="center">72.36</td>
     <td align="center">72.35</td>
@@ -1354,6 +1420,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">78.55</td>
+    <td align="center">78.66</td>
     <td align="center">-</td>
     <td align="center">78.26</td>
     <td align="center">78.24</td>
@@ -1366,6 +1433,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">79.09</td>
+    <td align="center">79.12</td>
     <td align="center">-</td>
     <td align="center">79.12</td>
     <td align="center">79.12</td>
@@ -1378,6 +1446,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">79.61</td>
+    <td align="center">79.60</td>
     <td align="center">-</td>
     <td align="center">79.60</td>
     <td align="center">79.60</td>
@@ -1390,6 +1459,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">70.96</td>
+    <td align="center">70.96</td>
     <td align="center">-</td>
     <td align="center">70.93</td>
     <td align="center">70.92</td>
@@ -1403,6 +1473,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](tut
     <td align="center">mIoU</td>
     <td align="center">69.10</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">69.10</td>
     <td align="center">69.10</td>
     <td align="center">68.95</td>
diff --git a/docs/en/build.md b/docs/en/build.md
index 005950d1ab..075c5204e7 100644
--- a/docs/en/build.md
+++ b/docs/en/build.md
@@ -89,6 +89,7 @@ Build the inference engine extension libraries you need.
 - [ncnn](backends/ncnn.md)
 - [pplnn](backends/pplnn.md)
 - [OpenVINO](backends/openvino.md)
+- [TorchScript](backends/torchscript.md)
 
 ### Install mmdeploy
 
diff --git a/docs/en/build/linux.md b/docs/en/build/linux.md
new file mode 100644
index 0000000000..1333ed77b7
--- /dev/null
+++ b/docs/en/build/linux.md
@@ -0,0 +1 @@
+TODO
diff --git a/docs/en/build/windows.md b/docs/en/build/windows.md
new file mode 100644
index 0000000000..1333ed77b7
--- /dev/null
+++ b/docs/en/build/windows.md
@@ -0,0 +1 @@
+TODO
diff --git a/docs/en/codebases/mmdet.md b/docs/en/codebases/mmdet.md
index e5b4f5409d..f03bf7c60f 100644
--- a/docs/en/codebases/mmdet.md
+++ b/docs/en/codebases/mmdet.md
@@ -22,6 +22,7 @@ Please refer to [get_started.md](https://github.com/open-mmlab/mmdetection/blob/
 |   Cascade R-CNN    |   ObjectDetection    |      Y      |    Y     |   N   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) |
 |    Faster R-CNN    |   ObjectDetection    |      Y      |    Y     |   Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)  |
 | Faster R-CNN + DCN |   ObjectDetection    |      Y      |    Y     |   Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)  |
+|       GFL          |   ObjectDetection    |      Y      |    Y     |   N   |   ?   |    Y     |      [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl)     |
 | Cascade Mask R-CNN | InstanceSegmentation |      Y      |    N     |   N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) |
 |     Mask R-CNN     | InstanceSegmentation |      Y      |    Y     |   N   |   N   |    Y     |  [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn)   |
 
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
index 9336a06c67..a151c2bb01 100644
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
@@ -49,7 +49,7 @@ Now you can do model inference with the APIs provided by the backend. But what i
 ```python
 from mmdeploy.apis import inference_model
 
-result = inference_model(model_cfg, deploy_cfg, backend_models, img=img, device=device)
+result = inference_model(model_cfg, deploy_cfg, backend_files, img=img, device=device)
 ```
 
 The `inference_model` will create a wrapper module and do the inference for you. The result has the same format as the original OpenMMLab repo.
diff --git a/docs/en/index.rst b/docs/en/index.rst
index bc7e10d91f..717011adb0 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -46,6 +46,7 @@ You can switch between Chinese and English documents in the lower-left corner of
    backends/openvino.md
    backends/ncnn.md
    backends/pplnn.md
+   backends/torchscript.md
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md
index 32a44b1da8..fa7cf4f4ea 100644
--- a/docs/en/supported_models.md
+++ b/docs/en/supported_models.md
@@ -2,64 +2,65 @@
 
 The table below lists the models that are guaranteed to be exportable to other backends.
 
-| Model                     | Codebase         | OnnxRuntime | TensorRT | NCNN | PPLNN | OpenVINO |                                          Model config                                          |
-|:--------------------------|:-----------------|:-----------:|:--------:|:----:|:-----:|:--------:|:----------------------------------------------------------------------------------------------:|
-| RetinaNet                 | MMDetection      |      Y      |    Y     |  Y   |   Y   |    Y     |       [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)        |
-| Faster R-CNN              | MMDetection      |      Y      |    Y     |  Y   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)       |
-| YOLOv3                    | MMDetection      |      Y      |    Y     |  Y   |   N   |    Y     |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo)          |
-| YOLOX                     | MMDetection      |      Y      |    Y     |  Y   |   N   |    Y     |         [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox)          |
-| FCOS                      | MMDetection      |      Y      |    Y     |  Y   |   N   |    Y     |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos)          |
-| FSAF                      | MMDetection      |      Y      |    Y     |  Y   |   Y   |    Y     |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)          |
-| Mask R-CNN                | MMDetection      |      Y      |    Y     |  N   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn)        |
-| SSD[*](#note)             | MMDetection      |      Y      |    Y     |  Y   |   N   |    Y     |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)           |
-| FoveaBox                  | MMDetection      |      Y      |    N     |  N   |   N   |    Y     |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox)        |
-| ATSS                      | MMDetection      |      Y      |    Y     |  N   |   N   |    Y     |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss)          |
-| Cascade R-CNN             | MMDetection      |      Y      |    Y     |  N   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)      |
-| Cascade Mask R-CNN        | MMDetection      |      Y      |    N     |  N   |   N   |    Y     |      [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)      |
-| VFNet                     | MMDetection      |      N      |    N     |  N   |   N   |    Y     |         [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet)          |
-| ResNet                    | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet)       |
-| ResNeXt                   | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext)      |
-| SE-ResNet                 | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |     [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet)      |
-| MobileNetV2               | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |   [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2)    |
-| ShuffleNetV1              | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |   [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1)   |
-| ShuffleNetV2              | MMClassification |      Y      |    Y     |  Y   |   Y   |    Y     |   [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2)   |
-| FCN                       | MMSegmentation   |      Y      |    Y     |  Y   |   Y   |    Y     |         [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn)         |
-| PSPNet[*static](#note)    | MMSegmentation   |      Y      |    Y     |  Y   |   Y   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet)        |
-| DeepLabV3                 | MMSegmentation   |      Y      |    Y     |  Y   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3)      |
-| DeepLabV3+                | MMSegmentation   |      Y      |    Y     |  Y   |   Y   |    Y     |    [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus)    |
-| Fast-SCNN[*static](#note) | MMSegmentation   |      Y      |    Y     |  N   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn)       |
-| UNet[*static](#note)      | MMSegmentation   |      Y      |    Y     |  Y   |   Y   |    Y     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet)         |
-| ANN[*](#note)             | MMSegmentation   |      Y      |    Y     |  N   |   N   |    N     |         [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann)         |
-| APCNet                    | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    N     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet)        |
-| BiSeNetV1                 | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |      [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1)      |
-| BiSeNetV2                 | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |      [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2)      |
-| CGNet                     | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet)        |
-| DMNet                     | MMSegmentation   |      Y      |    N     |  N   |   N   |    N     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet)        |
-| DNLNet                    | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet)        |
-| EMANet                    | MMSegmentation   |      Y      |    Y     |  N   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet)        |
-| EncNet                    | MMSegmentation   |      Y      |    Y     |  N   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet)        |
-| ERFNet                    | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet)        |
-| FastFCN                   | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn)       |
-| GCNet                     | MMSegmentation   |      Y      |    Y     |  N   |   N   |    N     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet)        |
-| ICNet[*](#note)           | MMSegmentation   |      Y      |    Y     |  N   |   N   |    Y     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet)        |
-| ISANet                    | MMSegmentation   |      Y      |    Y     |  N   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet)        |
-| NonLocal Net              | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |    [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net)     |
-| OCRNet                    | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet)        |
-| PointRend                 | MMSegmentation   |      Y      |    Y     |  N   |   N   |    Y     |     [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend)      |
-| Semantic FPN              | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn)       |
-| STDC                      | MMSegmentation   |      Y      |    Y     |  Y   |   N   |    Y     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc)         |
-| UPerNet[*](#note)         | MMSegmentation   |      Y      |    Y     |  N   |   N   |    N     |       [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet)       |
-| DANet                     | MMSegmentation   |      Y      |    Y     |  N   |   N   |    N     |        [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet)        |
-| SRCNN                     | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     |     [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn)      |
-| ESRGAN                    | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     |     [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan)     |
-| SRGAN                     | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
-| SRResNet                  | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
-| Real-ESRGAN               | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     |  [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan)   |
-| EDSR                      | MMEditing        |      Y      |    Y     |  Y   |   N   |    Y     |      [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr)      |
-| RDN                       | MMEditing        |      Y      |    Y     |  Y   |   Y   |    Y     |      [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn)       |
-| DBNet                     | MMOCR            |      Y      |    Y     |  Y   |   Y   |    Y     |         [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet)          |
-| CRNN                      | MMOCR            |      Y      |    Y     |  Y   |   Y   |    N     |         [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn)         |
-| SAR                       | MMOCR            |      Y      |    N     |  N   |   N   |    N     |         [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar)          |
+| Model                     | Codebase         | TorchScript | OnnxRuntime | TensorRT | NCNN | PPLNN | OpenVINO | Model config                                                                                   |
+|---------------------------|------------------|:-----------:|:-----------:|:--------:|:----:|:-----:|:--------:|------------------------------------------------------------------------------------------------|
+| RetinaNet                 | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)              |
+| Faster R-CNN              | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)            |
+| YOLOv3                    | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo)                   |
+| YOLOX                     | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox)                  |
+| FCOS                      | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos)                   |
+| FSAF                      | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)                   |
+| Mask R-CNN                | MMDetection      |      Y      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn)              |
+| SSD[*](#note)             | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)                    |
+| FoveaBox                  | MMDetection      |      Y      |      Y      |    N     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox)               |
+| ATSS                      | MMDetection      |      N      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss)                   |
+| GFL                       | MMDetection      |      N      |      Y      |    Y     |  N   |   ?   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl)                    |
+| Cascade R-CNN             | MMDetection      |      N      |      Y      |    Y     |  N   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)           |
+| Cascade Mask R-CNN        | MMDetection      |      N      |      Y      |    N     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)           |
+| VFNet                     | MMDetection      |      N      |      N      |    N     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet)                  |
+| ResNet                    | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet)            |
+| ResNeXt                   | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext)           |
+| SE-ResNet                 | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet)          |
+| MobileNetV2               | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2)      |
+| ShuffleNetV1              | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1)     |
+| ShuffleNetV2              | MMClassification |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2)     |
+| FCN                       | MMSegmentation   |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn)                 |
+| PSPNet[*static](#note)    | MMSegmentation   |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet)              |
+| DeepLabV3                 | MMSegmentation   |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3)           |
+| DeepLabV3+                | MMSegmentation   |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus)       |
+| Fast-SCNN[*static](#note) | MMSegmentation   |      Y      |      Y      |    Y     |  N   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn)            |
+| UNet[*static](#note)      | MMSegmentation   |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet)                |
+| ANN[*](#note)             | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann)                 |
+| APCNet                    | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet)              |
+| BiSeNetV1                 | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1)           |
+| BiSeNetV2                 | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2)           |
+| CGNet                     | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet)               |
+| DMNet                     | MMSegmentation   |      ?      |      Y      |    N     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet)               |
+| DNLNet                    | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet)              |
+| EMANet                    | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet)              |
+| EncNet                    | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet)              |
+| ERFNet                    | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet)              |
+| FastFCN                   | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn)             |
+| GCNet                     | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet)               |
+| ICNet[*](#note)           | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet)               |
+| ISANet                    | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet)              |
+| NonLocal Net              | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net)        |
+| OCRNet                    | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet)              |
+| PointRend                 | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend)          |
+| Semantic FPN              | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn)             |
+| STDC                      | MMSegmentation   |      ?      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc)                |
+| UPerNet[*](#note)         | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet)             |
+| DANet                     | MMSegmentation   |      ?      |      Y      |    Y     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet)               |
+| SRCNN                     | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn)          |
+| ESRGAN                    | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan)         |
+| SRGAN                     | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
+| SRResNet                  | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
+| Real-ESRGAN               | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan)    |
+| EDSR                      | MMEditing        |      Y      |      Y      |    Y     |  Y   |   N   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr)           |
+| RDN                       | MMEditing        |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn)            |
+| DBNet                     | MMOCR            |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet)                  |
+| CRNN                      | MMOCR            |      Y      |      Y      |    Y     |  Y   |   Y   |    N     | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn)                 |
+| SAR                       | MMOCR            |      N      |      Y      |    N     |  N   |   N   |    N     | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar)                  |
 
 ### Note
 
diff --git a/docs/en/tutorials/how_to_support_new_backends.md b/docs/en/tutorials/how_to_support_new_backends.md
index 85319b8d91..c18cd86148 100644
--- a/docs/en/tutorials/how_to_support_new_backends.md
+++ b/docs/en/tutorials/how_to_support_new_backends.md
@@ -218,6 +218,7 @@ Although the backend engines are usually implemented in C/C++, it is convenient
         def _build_wrapper(backend: Backend,
                            backend_files: Sequence[str],
                            device: str,
+                           input_names: Optional[Sequence[str]] = None,
                            output_names: Optional[Sequence[str]] = None):
             if backend == Backend.ONNXRUNTIME:
                 from mmdeploy.backend.onnxruntime import ORTWrapper
diff --git a/docs/zh_cn/benchmark.md b/docs/zh_cn/benchmark.md
index 6ba3ababc0..52afc8d1b8 100644
--- a/docs/zh_cn/benchmark.md
+++ b/docs/zh_cn/benchmark.md
@@ -682,6 +682,7 @@ GPU: ncnn, TensorRT, PPLNN
 
 用户可以直接通过[如何测试性能](tutorials/how_to_evaluate_a_model.md)获得想要的性能测试结果。下面是我们环境中的测试结果：
 
+
 <details>
 <summary style="margin-left: 25px;">MMCls</summary>
 <div style="margin-left: 25px;">
@@ -691,6 +692,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <th align="center" colspan="3">MMCls</th>
     <th align="center">PyTorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNX Runtime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -703,6 +705,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Task</td>
     <td align="center">Metrics</td>
     <td align="center">fp32</td>
+    <td align="center">seresnet</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp16</td>
@@ -715,6 +718,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Classification</td>
     <td align="center">top-1</td>
     <td align="center">69.90</td>
+    <td align="center">69.90</td>
     <td align="center">69.88</td>
     <td align="center">69.88</td>
     <td align="center">69.86</td>
@@ -725,6 +729,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">top-5</td>
     <td align="center">89.43</td>
+    <td align="center">89.43</td>
     <td align="center">89.34</td>
     <td align="center">89.34</td>
     <td align="center">89.33</td>
@@ -738,6 +743,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">77.90</td>
     <td align="center">77.90</td>
     <td align="center">77.90</td>
+    <td align="center">77.90</td>
     <td align="center">-</td>
     <td align="center">77.78</td>
     <td align="center">77.89</td>
@@ -748,6 +754,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">93.66</td>
     <td align="center">93.66</td>
     <td align="center">93.66</td>
+    <td align="center">93.66</td>
     <td align="center">-</td>
     <td align="center">93.64</td>
     <td align="center">93.65</td>
@@ -759,6 +766,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">77.74</td>
     <td align="center">77.74</td>
     <td align="center">77.74</td>
+    <td align="center">77.74</td>
     <td align="center">77.75</td>
     <td align="center">77.63</td>
     <td align="center">77.73</td>
@@ -769,6 +777,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">93.84</td>
     <td align="center">93.84</td>
     <td align="center">93.84</td>
+    <td align="center">93.84</td>
     <td align="center">93.83</td>
     <td align="center">93.72</td>
     <td align="center">93.84</td>
@@ -781,6 +790,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">68.13</td>
     <td align="center">68.13</td>
     <td align="center">68.13</td>
+    <td align="center">68.13</td>
     <td align="center">67.71</td>
     <td align="center">68.11</td>
     <td rowspan="2">$MMCLS_DIR/configs/shufflenet_v1/shufflenet_v1_1x_b64x16_linearlr_bn_nowd_imagenet.py</td>
@@ -791,6 +801,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">87.81</td>
     <td align="center">87.81</td>
     <td align="center">87.81</td>
+    <td align="center">87.81</td>
     <td align="center">87.58</td>
     <td align="center">87.80</td>
   </tr>
@@ -801,6 +812,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">69.55</td>
     <td align="center">69.55</td>
     <td align="center">69.55</td>
+    <td align="center">69.55</td>
     <td align="center">69.54</td>
     <td align="center">69.10</td>
     <td align="center">69.54</td>
@@ -811,6 +823,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">88.92</td>
     <td align="center">88.92</td>
     <td align="center">88.92</td>
+    <td align="center">88.92</td>
     <td align="center">88.91</td>
     <td align="center">88.58</td>
     <td align="center">88.92</td>
@@ -822,6 +835,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">71.86</td>
     <td align="center">71.86</td>
     <td align="center">71.86</td>
+    <td align="center">71.86</td>
     <td align="center">71.87</td>
     <td align="center">70.91</td>
     <td align="center">71.84</td>
@@ -832,6 +846,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">90.42</td>
     <td align="center">90.42</td>
     <td align="center">90.42</td>
+    <td align="center">90.42</td>
     <td align="center">90.40</td>
     <td align="center">89.85</td>
     <td align="center">90.41</td>
@@ -849,6 +864,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <th align="center" colspan="4">MMDet</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -865,6 +881,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -877,6 +894,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">33.7</td>
+    <td align="center">33.7</td>
     <td align="center">-</td>
     <td align="center">33.5</td>
     <td align="center">33.5</td>
@@ -891,6 +909,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">25.5</td>
+    <td align="center">25.5</td>
     <td align="center">-</td>
     <td align="center">25.5</td>
     <td align="center">25.5</td>
@@ -905,6 +924,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">36.5</td>
+    <td align="center">36.4</td>
     <td align="center">-</td>
     <td align="center">36.4</td>
     <td align="center">36.4</td>
@@ -920,6 +940,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">box AP</td>
     <td align="center">36.6</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">36.6</td>
     <td align="center">36.5</td>
     <td align="center">-</td>
@@ -933,6 +954,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">37.4</td>
+    <td align="center">37.4</td>
     <td align="center">-</td>
     <td align="center">37.4</td>
     <td align="center">37.4</td>
@@ -947,6 +969,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">40.5</td>
+    <td align="center">40.3</td>
     <td align="center">-</td>
     <td align="center">40.3</td>
     <td align="center">40.3</td>
@@ -961,6 +984,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">37.4</td>
+    <td align="center">37.3</td>
     <td align="center">-</td>
     <td align="center">37.3</td>
     <td align="center">37.3</td>
@@ -976,6 +1000,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">box AP</td>
     <td align="center">39.4</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">39.4</td>
     <td align="center">39.4</td>
     <td align="center">-</td>
@@ -990,6 +1015,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">box AP</td>
     <td align="center">40.4</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">40.4</td>
     <td align="center">40.4</td>
     <td align="center">-</td>
@@ -1003,6 +1029,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">COCO2017</td>
     <td align="center">box AP</td>
     <td align="center">38.2</td>
+    <td align="center">38.1</td>
     <td align="center">-</td>
     <td align="center">38.1</td>
     <td align="center">38.1</td>
@@ -1014,6 +1041,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">mask AP</td>
     <td align="center">34.7</td>
+    <td align="center">34.7</td>
     <td align="center">-</td>
     <td align="center">33.7</td>
     <td align="center">33.7</td>
@@ -1034,6 +1062,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <th align="center" colspan="4">MMEdit</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNX Runtime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1041,7 +1070,6 @@ GPU: ncnn, TensorRT, PPLNN
   </tr>
 </thead>
 <tbody>
-
   <tr>
     <td align="center">Model</td>
     <td align="center">Task</td>
@@ -1050,6 +1078,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1061,6 +1090,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.4316</td>
+    <td align="center">28.4120</td>
     <td align="center">28.4323</td>
     <td align="center">28.4323</td>
     <td align="center">28.4286</td>
@@ -1071,6 +1101,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8099</td>
+    <td align="center">0.8106</td>
     <td align="center">0.8097</td>
     <td align="center">0.8097</td>
     <td align="center">0.8096</td>
@@ -1083,6 +1114,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.2700</td>
+    <td align="center">28.2619</td>
     <td align="center">28.2592</td>
     <td align="center">28.2592</td>
     <td align="center"> - </td>
@@ -1093,6 +1125,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.7778</td>
+    <td align="center">0.7784</td>
     <td align="center">0.7764</td>
     <td align="center">0.7774</td>
     <td align="center"> - </td>
@@ -1105,6 +1138,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.6428</td>
+    <td align="center">30.6306</td>
     <td align="center">30.6444</td>
     <td align="center">30.6430</td>
     <td align="center"> - </td>
@@ -1113,8 +1147,9 @@ GPU: ncnn, TensorRT, PPLNN
     <td rowspan="2">$MMEDIT_DIR/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py</td>
   </tr>
   <tr>
-    <td align="center"></td>
+    <td align="center">SSIM</td>
     <td align="center">0.8559</td>
+    <td align="center">0.8565</td>
     <td align="center">0.8558</td>
     <td align="center">0.8558</td>
     <td align="center"> - </td>
@@ -1127,16 +1162,18 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">27.9499</td>
+    <td align="center">27.9252</td>
     <td align="center">27.9408</td>
     <td align="center">27.9408</td>
     <td align="center"> - </td>
     <td align="center"> - </td>
     <td align="center">27.9388</td>
-    <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.pyy</td>
+    <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/srgan_x4c64b16_g1_1000k_div2k.py</td>
   </tr>
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.7846</td>
+    <td align="center">0.7851</td>
     <td align="center">0.7839</td>
     <td align="center">0.7839</td>
     <td align="center"> - </td>
@@ -1149,6 +1186,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.2252</td>
+    <td align="center">30.2069</td>
     <td align="center">30.2300</td>
     <td align="center">30.2300</td>
     <td align="center"> - </td>
@@ -1157,8 +1195,9 @@ GPU: ncnn, TensorRT, PPLNN
     <td rowspan="2">$MMEDIT_DIR/configs/restorers/srresnet_srgan/msrresnet_x4c64b16_g1_1000k_div2k.py</td>
   </tr>
   <tr>
-    <td align="center"></td>
+    <td align="center">SSIM</td>
     <td align="center">0.8491</td>
+    <td align="center">0.8497</td>
     <td align="center">0.8488</td>
     <td align="center">0.8488</td>
     <td align="center"> - </td>
@@ -1171,6 +1210,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">28.0297</td>
+    <td align="center">-</td>
     <td align="center">27.7016</td>
     <td align="center">27.7016</td>
     <td align="center"> - </td>
@@ -1181,6 +1221,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8236</td>
+    <td align="center">-</td>
     <td align="center">0.8122</td>
     <td align="center">0.8122</td>
     <td align="center"> - </td>
@@ -1193,6 +1234,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="2">Set5</td>
     <td align="center">PSNR</td>
     <td align="center">30.2223</td>
+    <td align="center">30.2192</td>
     <td align="center">30.2214</td>
     <td align="center">30.2214</td>
     <td align="center">30.2211</td>
@@ -1203,6 +1245,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <td align="center">SSIM</td>
     <td align="center">0.8500</td>
+    <td align="center">0.8507</td>
     <td align="center">0.8497</td>
     <td align="center">0.8497</td>
     <td align="center">0.8497</td>
@@ -1222,6 +1265,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <th align="center" colspan="4">MMOCR</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1238,6 +1282,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1250,6 +1295,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center" rowspan="3">ICDAR2015</td>
     <td align="center">recall</td>
     <td align="center">0.7310</td>
+    <td align="center">0.7308</td>
     <td align="center">0.7304</td>
     <td align="center">0.7198</td>
     <td align="center">0.7179</td>
@@ -1262,6 +1308,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">precision</td>
     <td align="center">0.8714</td>
     <td align="center">0.8718</td>
+    <td align="center">0.8714</td>
     <td align="center">0.8677</td>
     <td align="center">0.8674</td>
     <td align="center">0.8688</td>
@@ -1272,6 +1319,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">hmean</td>
     <td align="center">0.7950</td>
     <td align="center">0.7949</td>
+    <td align="center">0.7950</td>
     <td align="center">0.7868</td>
     <td align="center">0.7856</td>
     <td align="center">0.7821</td>
@@ -1286,6 +1334,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
+    <td align="center">0.8067</td>
     <td align="center">0.8063</td>
     <td align="center">0.8067</td>
     <td align="center">0.8067</td>
@@ -1298,6 +1347,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">IIIT5K</td>
     <td align="center">acc</td>
     <td align="center">0.9517</td>
+    <td align="center">-</td>
     <td align="center">0.9287</td>
     <td align="center">-</td>
     <td align="center">-</td>
@@ -1319,6 +1369,7 @@ GPU: ncnn, TensorRT, PPLNN
   <tr>
     <th align="center" colspan="3">MMSeg</th>
     <th align="center">Pytorch</th>
+    <th align="center">TorchScript</th>
     <th align="center">ONNXRuntime</th>
     <th align="center" colspan="3">TensorRT</th>
     <th align="center">PPLNN</th>
@@ -1333,6 +1384,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">fp32</td>
     <td align="center">fp32</td>
     <td align="center">fp32</td>
+    <td align="center">fp32</td>
     <td align="center">fp16</td>
     <td align="center">int8</td>
     <td align="center">fp16</td>
@@ -1343,6 +1395,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">72.25</td>
+    <td align="center">72.36</td>
     <td align="center">-</td>
     <td align="center">72.36</td>
     <td align="center">72.35</td>
@@ -1355,6 +1408,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">78.55</td>
+    <td align="center">78.66</td>
     <td align="center">-</td>
     <td align="center">78.26</td>
     <td align="center">78.24</td>
@@ -1367,6 +1421,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">79.09</td>
+    <td align="center">79.12</td>
     <td align="center">-</td>
     <td align="center">79.12</td>
     <td align="center">79.12</td>
@@ -1379,6 +1434,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">79.61</td>
+    <td align="center">79.60</td>
     <td align="center">-</td>
     <td align="center">79.60</td>
     <td align="center">79.60</td>
@@ -1391,6 +1447,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">Cityscapes</td>
     <td align="center">mIoU</td>
     <td align="center">70.96</td>
+    <td align="center">70.96</td>
     <td align="center">-</td>
     <td align="center">70.93</td>
     <td align="center">70.92</td>
@@ -1404,6 +1461,7 @@ GPU: ncnn, TensorRT, PPLNN
     <td align="center">mIoU</td>
     <td align="center">69.10</td>
     <td align="center">-</td>
+    <td align="center">-</td>
     <td align="center">69.10</td>
     <td align="center">69.10</td>
     <td align="center">68.95</td>
diff --git a/docs/zh_cn/build.md b/docs/zh_cn/build.md
index 3e0717ddc8..8093cbb0b0 100644
--- a/docs/zh_cn/build.md
+++ b/docs/zh_cn/build.md
@@ -87,6 +87,7 @@
 - [ncnn](https://mmdeploy.readthedocs.io/en/latest/backends/ncnn.html)
 - [PPLNN](https://mmdeploy.readthedocs.io/en/latest/backends/pplnn.html)
 - [OpenVINO](https://mmdeploy.readthedocs.io/en/latest/backends/openvino.html)
+- [TorchScript](https://mmdeploy.readthedocs.io/en/latest/backends/torchscript.md)
 
 ### 安装 MMDeploy
 
diff --git a/docs/zh_cn/build/linux.md b/docs/zh_cn/build/linux.md
new file mode 100644
index 0000000000..1333ed77b7
--- /dev/null
+++ b/docs/zh_cn/build/linux.md
@@ -0,0 +1 @@
+TODO
diff --git a/docs/zh_cn/build/windows.md b/docs/zh_cn/build/windows.md
new file mode 100644
index 0000000000..e0bcdfbc37
--- /dev/null
+++ b/docs/zh_cn/build/windows.md
@@ -0,0 +1,336 @@
+- [Windows 下构建方式](#windows-下构建方式)
+  - [源码安装](#源码安装)
+    - [安装构建和编译工具链](#安装构建和编译工具链)
+    - [安装依赖包](#安装依赖包)
+      - [安装 MMDeploy Converter 依赖](#安装-mmdeploy-converter-依赖)
+      - [安装 MMDeploy SDK 依赖](#安装-mmdeploy-sdk-依赖)
+      - [安装推理引擎](#安装推理引擎)
+    - [编译 MMDeploy](#编译-mmdeploy)
+      - [编译安装 Model Converter](#编译安装-model-converter)
+        - [编译自定义算子](#编译自定义算子)
+        - [安装 Model Converter](#安装-model-converter)
+      - [编译 SDK](#编译-sdk)
+        - [编译选项说明](#编译选项说明)
+        - [编译样例](#编译样例)
+        - [编译 SDK Demo](#编译-sdk-demo)
+    - [注意事项](#注意事项)
+
+---
+# Windows 下构建方式
+
+目前，MMDeploy 在 Windows 平台下仅提供源码编译安装方式。未来会提供预编译包方式。
+
+## 源码安装
+下述安装方式，均是在 **Windows 10** 下进行
+### 安装构建和编译工具链
+1. 下载并安装 [Visual Studio 2019](https://visualstudio.microsoft.com) 。安装时请勾选 "使用C++的桌面开发, "Windows 10 SDK <br>
+2. 把 cmake 路径加入到环境变量 PATH 中, "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin" <br>
+3. 如果系统中配置了 NVIDIA 显卡，根据官网教程，下载并安装 cuda toolkit。<br>
+### 安装依赖包
+
+#### 安装 MMDeploy Converter 依赖
+<table>
+<thead>
+  <tr>
+    <th>名称 </th>
+    <th>安装方法 </th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>conda </td>
+    <td>强烈建议安装conda，或者miniconda。比如， <br>https://repo.anaconda.com/miniconda/Miniconda3-py37_4.11.0-Windows-x86_64.exe <br>安装完毕后，打开系统开始菜单，输入prompt，选择并打开 anaconda powershell prompt。 <br><b>下文中的安装命令均是在 anaconda powershell 中测试验证的。</b> </td>
+  </tr>
+  <tr>
+    <td>pytorch <br>(>=1.8.0) </td>
+    <td>
+    参考<a href="https://pytorch.org/get-started/locally/">pytorch官网</a>，根据系统环境, 选择合适的预编译包进行安装。比如, <br>
+    <pre><code>
+    pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
+    </code></pre>
+    </td>
+  </tr>
+  <tr>
+    <td>mmcv-full </td>
+    <td>参考<a href="https://github.com/open-mmlab/mmcv">mmcv官网</a>，根据系统环境，选择预编译包进行安装。比如，<br>
+    <pre><code>
+    $env:cu_version="cu111"
+    $env:torch_version="torch1.8.0"
+    pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/$env:cu_version/$env:torch_version/index.html
+    </code></pre>
+    </td>
+  </tr>
+</tbody>
+</table>
+
+
+#### 安装 MMDeploy SDK 依赖
+<table>
+<thead>
+  <tr>
+    <th>名称 </th>
+    <th>安装方法 </th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>spdlog </td>
+    <td>spdlog是一个精巧的日志管理库。请参考如下命令安装： <br>
+    1. 下载 https://github.com/gabime/spdlog/archive/refs/tags/v1.9.2.zip <br>
+    2. 解压后，进入到文件夹 spdlog-v1.9.2 <br>
+    3. 执行编译安装命令 <br>
+    <pre><code>
+    mkdir build
+    cd build
+    cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release
+    cmake --build . --target install -j --config Release
+    </code></pre>
+   </td>
+  </tr>
+  <tr>
+    <td>OpenCV </td>
+    <td>
+    1. 下载并安装 OpenCV 在 windows 下的预编译包: https://github.com/opencv/opencv/releases/download/4.5.5/opencv-4.5.5-vc14_vc15.exe <br>
+    2. 把 OpenCV 库的路径加入到环境变量 PATH 中</td>
+
+  </tr>
+  <tr>
+    <td>pplcv </td>
+    <td>pplcv 是在x86和cuda平台下的高性能图像处理库。 <b>此依赖项为可选项，只有在cuda平台下，才需安装。而且，目前必须使用v0.6.2，且需要使用git clone的方式下载源码并编译安装</b><br>
+    <pre><code>
+    git clone --recursive git@github.com:openppl-public/ppl.cv.git
+    cd ppl.cv
+    git checkout tags/v0.6.2 -b v0.6.2
+    ./build.bat -G "Visual Studio 16 2019" -T v142 -A x64 -DHPCC_USE_CUDA=ON -DHPCC_MSVC_MD=ON
+    </code></pre>
+   </td>
+  </tr>
+</tbody>
+</table>
+
+
+
+#### 安装推理引擎
+目前，在 Windows 平台下，MMDeploy 支持 ONNXRuntime 和 TensorRT 两种推理引擎。其他推理引擎尚未进行验证，或者验证未通过。后续将陆续予以支持
+<table>
+<thead>
+  <tr>
+    <th>推理引擎 </th>
+    <th>依赖包</th>
+    <th>安装方法 </th>
+  </tr>
+</thead>
+<tbody>
+    <tr>
+    <td>ONNXRuntime</td>
+    <td>onnxruntime </td>
+    <td>
+    1. 下载二进制包：https://github.com/microsoft/onnxruntime/releases/download/v1.8.0/onnxruntime-win-x64-1.8.0.zip <br>
+    2. 解压到目标路径。我们使用 <code>onnxruntime_dir</code> 代表此路径 <br>
+    3. 在 PATH 中增加 onnxruntime libs 路径,
+    <pre><code>
+    $env:path = "{onnxruntime_dir}/lib;" + $env:path
+    </code></pre>
+    </td>
+  </tr>
+  <tr>
+    <td rowspan="2">TensorRT<br> </td>
+    <td>TensorRT <br> </td>
+    <td>
+    1. 从NVIDIA官网下载二进制包, 比如，<br>
+   https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.3.0/zip/TensorRT-8.2.3.0.Windows10.x86_64.cuda-11.4.cudnn8.2.zip <br>
+    1. 解压二进制包到目标路径。我们使用 <code>tensorrt_dir</code> 代表此路径 <br>
+    2. 安装 tensorrt 的 python package<br>
+    3. 在 PATH 中增加 tensorrt libs 路径
+   <pre><code>
+   pip install {tensorrt_dir}/python/tensorrt-8.2.3.0-cp37-none-win_amd64.whl
+   $env:path = "{tensorrt_dir}/lib;" + $env:path
+   </code></pre>
+   </td>
+  </tr>
+  <tr>
+    <td>cudnn </td>
+    <td>
+    1. 从NVIDIA官网下载二进制包, 比如, <br>
+   https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.1.32/11.3_06072021/cudnn-11.3-windows-x64-v8.2.1.32.zip <br>
+    1. 解压二进制包到目标路径。我们使用 <code>cudnn_dir</code> 代表此路径 <br>
+    2. 在 PATH 中增加 cudnn libs 路径
+   <pre><code>
+   $env:path = "{cudnn_dir}/bin;" + $env:path
+   </code><pre>
+   </td>
+  </tr>
+  <tr>
+    <td>PPL.NN</td>
+    <td>ppl.nn </td>
+    <td> TODO </td>
+  </tr>
+  <tr>
+    <td>OpenVINO</td>
+    <td>openvino </td>
+    <td>TODO </td>
+  </tr>
+  <tr>
+    <td>ncnn </td>
+    <td>ncnn </td>
+    <td>TODO </td>
+  </tr>
+</tbody>
+</table>
+
+### 编译 MMDeploy
+
+#### 编译安装 Model Converter
+##### 编译自定义算子
+- **ONNXRuntime** 自定义算子
+```powershell
+mkdir build
+cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 -DMMDEPLOY_TARGET_BACKENDS="ort" -DONNXRUNTIME_DIR={onnxruntime_dir}
+cmake --build . --config Release -- /maxcpucount:4
+```
+
+- **TensorRT** 自定义算子
+
+```powershell
+mkdir build
+cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 -DMMDEPLOY_TARGET_BACKENDS="trt" -DTENSORRT_DIR={tensorrt_dir} -DCUDNN_DIR={cudnn_dir}
+cmake --build . --config Release -- /maxcpucount:4
+```
+
+- **ncnn** 自定义算子
+
+  TODO
+
+##### 安装 Model Converter
+```powershell
+cd root/path/of/MMDeploy
+pip install -e .
+```
+#### 编译 SDK
+##### 编译选项说明
+<table>
+<thead>
+  <tr>
+    <th>编译选项</th>
+    <th>取值范围</th>
+    <th>缺省值</th>
+    <th>说明</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>MMDEPLOY_BUILD_SDK</td>
+    <td>{ON, OFF}</td>
+    <td>OFF</td>
+    <td>MMDeploy SDK 编译开关</td>
+  </tr>
+  <tr>
+    <td>MMDEPLOY_BUILD_SDK_PYTHON_API</td>
+    <td>{ON, OFF}</td>
+    <td>OFF</td>
+    <td>MMDeploy SDK python package的编译开关</td>
+  </tr>
+  <tr>
+    <td>MMDEPLOY_BUILD_TEST</td>
+    <td>{ON, OFF}</td>
+    <td>OFF</td>
+    <td>MMDeploy SDK的测试程序编译开关</td>
+  </tr>
+  <tr>
+    <td>MMDEPLOY_TARGET_DEVICES</td>
+    <td>{"cpu", "cuda"}</td>
+    <td>cpu</td>
+    <td>设置目标设备。当有多个设备时，设备名称之间使用分号隔开。 比如，-DMMDEPLOY_TARGET_DEVICES="cpu;cuda"</td>
+  </tr>
+  <tr>
+    <td>MMDEPLOY_TARGET_BACKENDS</td>
+    <td>{"trt", "ort", "pplnn", "ncnn", "openvino"}</td>
+    <td>N/A</td>
+    <td> <b>默认情况下，SDK不设置任何后端</b>, 因为它与应用场景高度相关。 当选择多个后端时， 中间使用分号隔开。比如，<pre><code>-DMMDEPLOY_TARGET_BACKENDS="trt;ort;pplnn;ncnn;openvino"</code></pre>
+    构建时，几乎每个后端，都需设置一些环境变量，用来查找依赖包。<br>
+    1. <b>trt</b>: 表示 TensorRT, 需要设置 TENSORRT_DIR 和 CUDNN_DIR。类似， <pre><code>-DTENSORRT_DIR={tensorrt_dir}<br>-DCUDNN_DIR={cudnn_dir}</code></pre>
+    2. <b>ort</b>: 表示 ONNXRuntime，需要设置 ONNXRUNTIME_DIR。类似， <pre><code>-DONNXRUNTIME_DIR={onnxruntime_dir}</code></pre>
+    3. <b>pplnn</b>: 表示 PPL.NN，需要设置 pplnn_DIR。<b>当前版本尚未验证</b> <br>
+    4. <b>ncnn</b>：需要设置 ncnn_DIR。<b>当前版本尚未验证</b> <br>
+    5. <b>openvino</b>: 表示 OpenVINO，需要设置 InferenceEngine_DIR。<b>当前版本尚未验证通过</b>
+   </td>
+  </tr>
+  <tr>
+    <td>MMDEPLOY_CODEBASES</td>
+    <td>{"mmcls", "mmdet", "mmseg", "mmedit", "mmocr", "all"}</td>
+    <td>N/A</td>
+    <td>用来设置SDK后处理组件，加载OpenMMLab算法仓库的后处理功能。已支持的算法仓库有'mmcls'，'mmdet'，'mmedit'，'mmseg'和'mmocr'。如果选择多个codebase，中间使用分号隔开。比如，<code>-DMMDEPLOY_CODEBASES="mmcls;mmdet"</code>。也可以通过 <code>-DMMDEPLOY_CODEBASES=all</code> 方式，加载所有codebase。</td>
+  </tr>
+  <tr>
+    <td>BUILD_SHARED_LIBS</td>
+    <td>{ON, OFF}</td>
+    <td>ON</td>
+    <td>动态库的编译开关。设置OFF时，编译静态库</td>
+  </tr>
+</tbody>
+</table>
+
+
+##### 编译样例
+
+下文展示2个构建SDK的样例，分别用于不同的运行环境。
+
+- cpu + ONNXRuntime
+
+  ```PowerShell
+  mkdir build
+  cd build
+  cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 `
+      -DMMDEPLOY_BUILD_SDK=ON `
+      -DMMDEPLOY_TARGET_DEVICES="cpu" `
+      -DMMDEPLOY_TARGET_BACKENDS="ort" `
+      -DMMDEPLOY_CODEBASES="all" `
+      -DONNXRUNTIME_DIR={onnxruntime_dir} `
+      -Dspdlog_DIR={spdlog_dir}/build/install/lib/cmake/spdlog `
+      -DOpenCV_DIR={opencv_dir}/build
+  cmake --build . --config Release -- /maxcpucount:4
+  cmake --install . --config Release
+  ```
+
+- cuda + TensorRT
+
+  ```PowerShell
+   mkdir build
+   cd build
+   cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 `
+     -DMMDEPLOY_BUILD_SDK=ON `
+     -DMMDEPLOY_TARGET_DEVICES="cuda" `
+     -DMMDEPLOY_TARGET_BACKENDS="trt" `
+     -DMMDEPLOY_CODEBASES="all" `
+     -Dpplcv_DIR={pplcv_dir}/pplcv-build/install/lib/cmake/ppl `
+     -DTENSORRT_DIR={tensorrt_dir} `
+     -DCUDNN_DIR={cudnn_dir} `
+     -Dspdlog_DIR={spdlog_dir}/build/install/lib/cmake/spdlog `
+     -DOpenCV_DIR={opencv_dir}/build
+   cmake --build . --config Release -- /maxcpucount:4
+   cmake --install . --config Release
+  ```
+- 其他
+
+  请参考上述两个示例，以及前述 SDK 的编译选项，在其他运行环境下编译 SDK
+
+##### 编译 SDK Demo
+
+```PowerShell
+cd install/example
+mkdir build
+cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -T v142 `
+  -DMMDeploy_DIR={mmdeploy_dir}/build/install/lib/cmake/MMDeploy `
+  -Dspdlog_DIR={spdlog_dir}/build/install/lib/cmake/spdlog `
+  -DOpenCV_DIR={opencv_dir}/build
+cmake --build . --config Release -- /maxcpucount:4
+
+$env:path = "${mmdeploy_dir}/build/install/bin;" + $env:path
+
+```
+
+### 注意事项
+  1. Release / Debug 库不能混用。MMDeploy要是编译Debug版本，所有第三方依赖都要是Debug版本。
diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md
index 414bc481d3..f817128296 100644
--- a/docs/zh_cn/get_started.md
+++ b/docs/zh_cn/get_started.md
@@ -49,7 +49,7 @@ python ${MMDEPLOY_DIR}/tools/deploy.py \
 ```python
 from mmdeploy.apis import inference_model
 
-result = inference_model(model_cfg, deploy_cfg, backend_models, img=img, device=device)
+result = inference_model(model_cfg, deploy_cfg, backend_files, img=img, device=device)
 ```
 
 `inference_model`会创建一个对后端模型的封装，通过该封装进行推理。推理的结果会保持与OpenMMLab中原模型同样的格式。
diff --git a/mmdeploy/apis/__init__.py b/mmdeploy/apis/__init__.py
index 70e6e479d5..48b1339d15 100644
--- a/mmdeploy/apis/__init__.py
+++ b/mmdeploy/apis/__init__.py
@@ -3,11 +3,12 @@
 from .extract_model import extract_model
 from .inference import inference_model
 from .pytorch2onnx import torch2onnx, torch2onnx_impl
+from .pytorch2torchscript import torch2torchscript, torch2torchscript_impl
 from .utils import build_task_processor, get_predefined_partition_cfg
 from .visualize import visualize_model
 
 __all__ = [
     'create_calib_table', 'extract_model', 'inference_model', 'torch2onnx',
-    'torch2onnx_impl', 'build_task_processor', 'get_predefined_partition_cfg',
-    'visualize_model'
+    'torch2onnx_impl', 'torch2torchscript', 'torch2torchscript_impl',
+    'build_task_processor', 'get_predefined_partition_cfg', 'visualize_model'
 ]
diff --git a/mmdeploy/apis/pytorch2torchscript.py b/mmdeploy/apis/pytorch2torchscript.py
new file mode 100644
index 0000000000..c984892360
--- /dev/null
+++ b/mmdeploy/apis/pytorch2torchscript.py
@@ -0,0 +1,111 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import Any, Optional, Sequence, Union
+
+import mmcv
+import torch
+from packaging.version import parse as version_parse
+
+from mmdeploy.backend.torchscript import get_ops_path
+from mmdeploy.core import RewriterContext, patch_model
+from mmdeploy.utils import (IR, get_backend, get_input_shape, get_root_logger,
+                            load_config)
+
+
+def torch2torchscript_impl(model: torch.nn.Module,
+                           inputs: Union[torch.Tensor, Sequence[torch.Tensor]],
+                           deploy_cfg: Union[str,
+                                             mmcv.Config], output_file: str):
+    """Converting torch model to torchscript.
+
+    Args:
+        model (torch.nn.Module): Input pytorch model.
+        inputs (torch.Tensor | Sequence[torch.Tensor]): Input tensors used to
+            convert model.
+        deploy_cfg (str | mmcv.Config): Deployment config file or
+            Config object.
+        output_file (str): Output file to save torchscript model.
+    """
+    # load custom ops if exist
+    custom_ops_path = get_ops_path()
+    if osp.exists(custom_ops_path):
+        torch.ops.load_library(custom_ops_path)
+
+    deploy_cfg = load_config(deploy_cfg)[0]
+
+    backend = get_backend(deploy_cfg).value
+
+    patched_model = patch_model(model, cfg=deploy_cfg, backend=backend)
+
+    with RewriterContext(
+            cfg=deploy_cfg, backend=backend,
+            ir=IR.TORCHSCRIPT), torch.no_grad(), torch.jit.optimized_execution(
+                True):
+        # for exporting models with weight that depends on inputs
+        patched_model(
+            *inputs) if isinstance(inputs, Sequence) else patched_model(inputs)
+        ts_model = torch.jit.trace(patched_model, inputs)
+
+    # perform optimize, note that optimizing models may trigger errors when
+    # loading the saved .pt file, as described in
+    # https://github.com/pytorch/pytorch/issues/62706
+    logger = get_root_logger()
+    logger.info('perform torchscript optimizer.')
+    try:
+        # custom optimizer
+        from mmdeploy.backend.torchscript import ts_optimizer
+        logger = get_root_logger()
+        ts_optimizer.optimize_for_backend(
+            ts_model._c, ir=IR.TORCHSCRIPT.value, backend=backend)
+    except Exception:
+        # use pytorch builtin optimizer
+        ts_model = torch.jit.freeze(ts_model)
+        torch_version = version_parse(torch.__version__)
+        if torch_version.minor >= 9:
+            ts_model = torch.jit.optimize_for_inference(ts_model)
+
+    # save model
+    torch.jit.save(ts_model, output_file)
+
+
+def torch2torchscript(img: Any,
+                      work_dir: str,
+                      save_file: str,
+                      deploy_cfg: Union[str, mmcv.Config],
+                      model_cfg: Union[str, mmcv.Config],
+                      model_checkpoint: Optional[str] = None,
+                      device: str = 'cuda:0'):
+    """Convert PyTorch model to torchscript model.
+
+    Args:
+        img (str | np.ndarray | torch.Tensor): Input image used to assist
+            converting model.
+        work_dir (str): A working directory to save files.
+        save_file (str): Filename to save torchscript model.
+        deploy_cfg (str | mmcv.Config): Deployment config file or
+            Config object.
+        model_cfg (str | mmcv.Config): Model config file or Config object.
+        model_checkpoint (str): A checkpoint path of PyTorch model,
+            defaults to `None`.
+        device (str): A string specifying device type, defaults to 'cuda:0'.
+    """
+    # load deploy_cfg if necessary
+    deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+    mmcv.mkdir_or_exist(osp.abspath(work_dir))
+    output_file = osp.join(work_dir, save_file)
+
+    input_shape = get_input_shape(deploy_cfg)
+
+    from mmdeploy.apis import build_task_processor
+    task_processor = build_task_processor(model_cfg, deploy_cfg, device)
+
+    torch_model = task_processor.init_pytorch_model(model_checkpoint)
+    _, model_inputs = task_processor.create_input(img, input_shape)
+    if not isinstance(model_inputs, torch.Tensor):
+        model_inputs = model_inputs[0]
+
+    torch2torchscript_impl(
+        torch_model,
+        model_inputs,
+        deploy_cfg=deploy_cfg,
+        output_file=output_file)
diff --git a/mmdeploy/backend/ncnn/init_plugins.py b/mmdeploy/backend/ncnn/init_plugins.py
index 97667defd4..10f9ca2655 100644
--- a/mmdeploy/backend/ncnn/init_plugins.py
+++ b/mmdeploy/backend/ncnn/init_plugins.py
@@ -1,7 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import glob
 import os
 
+from mmdeploy.utils import get_file_path
+
 
 def get_ops_path() -> str:
     """Get NCNN custom ops library path.
@@ -9,14 +10,11 @@ def get_ops_path() -> str:
     Returns:
         str: The library path of NCNN custom ops.
     """
-    wildcard = os.path.abspath(
-        os.path.join(
-            os.path.dirname(__file__),
-            '../../../build/lib/libmmdeploy_ncnn_ops.so'))
-
-    paths = glob.glob(wildcard)
-    lib_path = paths[0] if len(paths) > 0 else ''
-    return lib_path
+    candidates = [
+        '../../../build/lib/libmmdeploy_ncnn_ops.so',
+        '../../../build/bin/*/mmdeploy_ncnn_ops.dll'
+    ]
+    return get_file_path(os.path.dirname(__file__), candidates)
 
 
 def get_onnx2ncnn_path() -> str:
@@ -25,10 +23,7 @@ def get_onnx2ncnn_path() -> str:
     Returns:
         str: A path of onnx2ncnn tool.
     """
-    wildcard = os.path.abspath(
-        os.path.join(
-            os.path.dirname(__file__), '../../../build/bin/onnx2ncnn'))
-
-    paths = glob.glob(wildcard)
-    lib_path = paths[0] if len(paths) > 0 else ''
-    return lib_path
+    candidates = [
+        '../../../build/bin/onnx2ncnn', '../../../build/bin/*/onnx2ncnn.exe'
+    ]
+    return get_file_path(os.path.dirname(__file__), candidates)
diff --git a/mmdeploy/backend/onnxruntime/init_plugins.py b/mmdeploy/backend/onnxruntime/init_plugins.py
index 06cd001502..e8622eedf3 100644
--- a/mmdeploy/backend/onnxruntime/init_plugins.py
+++ b/mmdeploy/backend/onnxruntime/init_plugins.py
@@ -1,7 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import glob
 import os
 
+from mmdeploy.utils import get_file_path
+
 
 def get_ops_path() -> str:
     """Get the library path of onnxruntime custom ops.
@@ -9,11 +10,8 @@ def get_ops_path() -> str:
     Returns:
         str: The library path to onnxruntime custom ops.
     """
-    wildcard = os.path.abspath(
-        os.path.join(
-            os.path.dirname(__file__),
-            '../../../build/lib/libmmdeploy_onnxruntime_ops.so'))
-
-    paths = glob.glob(wildcard)
-    lib_path = paths[0] if len(paths) > 0 else ''
-    return lib_path
+    candidates = [
+        '../../../build/lib/libmmdeploy_onnxruntime_ops.so',
+        '../../../build/bin/*/mmdeploy_onnxruntime_ops.dll',
+    ]
+    return get_file_path(os.path.dirname(__file__), candidates)
diff --git a/mmdeploy/backend/sdk/__init__.py b/mmdeploy/backend/sdk/__init__.py
index 95a9548505..ef648c4d5b 100644
--- a/mmdeploy/backend/sdk/__init__.py
+++ b/mmdeploy/backend/sdk/__init__.py
@@ -3,14 +3,24 @@
 import os
 import sys
 
-lib_dir = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '../../../build/lib'))
-
-sys.path.insert(0, lib_dir)
+from mmdeploy.utils import get_file_path
 
 _is_available = False
 
-if importlib.util.find_spec('mmdeploy_python') is not None:
+module_name = 'mmdeploy_python'
+
+candidates = [
+    f'../../../build/lib/{module_name}.*.so',
+    f'../../../build/bin/*/{module_name}.*.pyd'
+]
+
+lib_path = get_file_path(os.path.dirname(__file__), candidates)
+
+if lib_path:
+    lib_dir = os.path.dirname(lib_path)
+    sys.path.insert(0, lib_dir)
+
+if importlib.util.find_spec(module_name) is not None:
     from .wrapper import SDKWrapper
     __all__ = ['SDKWrapper']
     _is_available = True
diff --git a/mmdeploy/backend/tensorrt/init_plugins.py b/mmdeploy/backend/tensorrt/init_plugins.py
index 9bb0da7f43..80c6eea4d7 100644
--- a/mmdeploy/backend/tensorrt/init_plugins.py
+++ b/mmdeploy/backend/tensorrt/init_plugins.py
@@ -1,9 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import ctypes
-import glob
 import os
 
-from mmdeploy.utils import get_root_logger
+from mmdeploy.utils import get_file_path, get_root_logger
 
 
 def get_ops_path() -> str:
@@ -12,14 +11,11 @@ def get_ops_path() -> str:
     Returns:
         str: A path of the TensorRT plugin library.
     """
-    wildcard = os.path.abspath(
-        os.path.join(
-            os.path.dirname(__file__),
-            '../../../build/lib/libmmdeploy_tensorrt_ops.so'))
-
-    paths = glob.glob(wildcard)
-    lib_path = paths[0] if len(paths) > 0 else ''
-    return lib_path
+    candidates = [
+        '../../../build/lib/libmmdeploy_tensorrt_ops.so',
+        '../../../build/bin/*/mmdeploy_tensorrt_ops.dll'
+    ]
+    return get_file_path(os.path.dirname(__file__), candidates)
 
 
 def load_tensorrt_plugin() -> bool:
diff --git a/mmdeploy/backend/torchscript/__init__.py b/mmdeploy/backend/torchscript/__init__.py
new file mode 100644
index 0000000000..9179ef3da6
--- /dev/null
+++ b/mmdeploy/backend/torchscript/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# flake8: noqa
+from .init_plugins import get_ops_path, ops_available
+
+
+def is_available():
+    """Torchscript available.
+
+    Returns:
+        bool: Always True.
+    """
+    return True
+
+
+__all__ = ['get_ops_path', 'ops_available']
+
+if is_available():
+    from .wrapper import TorchscriptWrapper
+
+    __all__ += ['TorchscriptWrapper']
diff --git a/mmdeploy/backend/torchscript/init_plugins.py b/mmdeploy/backend/torchscript/init_plugins.py
new file mode 100644
index 0000000000..ec0371b59a
--- /dev/null
+++ b/mmdeploy/backend/torchscript/init_plugins.py
@@ -0,0 +1,28 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import glob
+import os.path as osp
+
+
+def get_ops_path() -> str:
+    """Get path of the torchscript extension library.
+
+    Returns:
+        str: A path of the torchscript extension library.
+    """
+    wildcard = osp.abspath(
+        osp.join(
+            osp.dirname(__file__),
+            '../../../build/lib/libmmdeploy_torchscript_ops.so'))
+
+    paths = glob.glob(wildcard)
+    lib_path = paths[0] if len(paths) > 0 else ''
+    return lib_path
+
+
+def ops_available() -> bool:
+    """Return whether ops are available.
+
+    Returns:
+        bool: Whether ops are available.
+    """
+    return osp.exists(get_ops_path())
diff --git a/mmdeploy/backend/torchscript/wrapper.py b/mmdeploy/backend/torchscript/wrapper.py
new file mode 100644
index 0000000000..668ab23aa0
--- /dev/null
+++ b/mmdeploy/backend/torchscript/wrapper.py
@@ -0,0 +1,108 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import Dict, Optional, Sequence, Union
+
+import torch
+
+from mmdeploy.utils import Backend
+from mmdeploy.utils.timer import TimeCounter
+from ..base import BACKEND_WRAPPER, BaseWrapper
+from .init_plugins import get_ops_path
+
+
+@BACKEND_WRAPPER.register_module(Backend.TORCHSCRIPT.value)
+class TorchscriptWrapper(BaseWrapper):
+    """Torchscript engine wrapper for inference.
+
+    Args:
+        model (torch.jit.RecursiveScriptModule): torchscript engine to wrap.
+        input_names (Sequence[str] | None): Names of model inputs  in order.
+            Defaults to `None` and the wrapper will accept list or Tensor.
+        output_names (Sequence[str] | None): Names of model outputs  in order.
+            Defaults to `None` and the wrapper will return list or Tensor.
+
+    Note:
+        If the engine is converted from onnx model. The input_names and
+        output_names should be the same as onnx model.
+
+    Examples:
+        >>> from mmdeploy.backend.torchscript import TorchscriptWrapper
+        >>> engine_file = 'resnet.engine'
+        >>> model = TorchscriptWrapper(engine_file, input_names=['input'], \
+        >>>    output_names=['output'])
+        >>> inputs = dict(input=torch.randn(1, 3, 224, 224))
+        >>> outputs = model(inputs)
+        >>> print(outputs)
+    """
+
+    def __init__(self,
+                 model: Union[str, torch.jit.RecursiveScriptModule],
+                 input_names: Optional[Sequence[str]] = None,
+                 output_names: Optional[Sequence[str]] = None):
+        # load custom ops if exist
+        custom_ops_path = get_ops_path()
+        if osp.exists(custom_ops_path):
+            torch.ops.load_library(custom_ops_path)
+        super().__init__(output_names)
+        self.ts_model = model
+        if isinstance(self.ts_model, str):
+            self.ts_model = torch.jit.load(self.ts_model)
+
+        assert isinstance(self.ts_model, torch.jit.RecursiveScriptModule
+                          ), 'failed to load torchscript model.'
+
+        self._input_names = input_names
+        self._output_names = output_names
+
+    def forward(
+        self, inputs: Union[torch.Tensor, Sequence[torch.Tensor],
+                            Dict[str, torch.Tensor]]
+    ) -> Union[torch.Tensor, Sequence[torch.Tensor], Dict[str, torch.Tensor]]:
+        """Run forward inference.
+
+        Args:
+            inputs (torch.Tensor | Sequence[torch.Tensor] | Dict[str,
+                torch.Tensor]): The input tensor, or tensor sequence, or pairs
+                of input names and tensors.
+
+        Return:
+            outputs (torch.Tensor | Sequence[torch.Tensor] | Dict[str,
+                torch.Tensor]): The input tensor, or tensor sequence, or pairs
+                of input names and tensors.
+        """
+
+        is_dict_inputs = isinstance(inputs, Dict)
+        if is_dict_inputs:
+            # inputs to dict
+            assert self._input_names is not None, \
+                'input names have not been given.'
+            inputs = [inputs[input_name] for input_name in self._input_names]
+        elif isinstance(inputs, torch.Tensor):
+            inputs = [inputs]
+
+        outputs = self.__torchscript_execute(inputs)
+
+        if self._output_names is not None and is_dict_inputs:
+            # output to dict
+            if isinstance(outputs, torch.Tensor):
+                outputs = [outputs]
+            outputs = dict(zip(self._output_names, outputs))
+
+        if isinstance(outputs, tuple) and self._output_names is not None:
+            assert len(outputs) == len(self._output_names)
+            outputs = dict(zip(self._output_names, outputs))
+        return outputs
+
+    @TimeCounter.count_time()
+    def __torchscript_execute(
+            self, inputs: Sequence[torch.Tensor]) -> Sequence[torch.Tensor]:
+        """Run inference with TorchScript.
+
+        Args:
+            inputs (Sequence[torch.Tensor]): A list of integer binding the
+            input/output.
+        Returns:
+            torch.Tensor | Sequence[torch.Tensor]: The inference outputs from
+            TorchScript.
+        """
+        return self.ts_model(*inputs)
diff --git a/mmdeploy/codebase/base/backend_model.py b/mmdeploy/codebase/base/backend_model.py
index 6a21440693..93dc2fe74f 100644
--- a/mmdeploy/codebase/base/backend_model.py
+++ b/mmdeploy/codebase/base/backend_model.py
@@ -35,8 +35,11 @@ def __init__(self,
     def _build_wrapper(backend: Backend,
                        backend_files: Sequence[str],
                        device: str,
+                       input_names: Optional[Sequence[str]] = None,
                        output_names: Optional[Sequence[str]] = None,
-                       deploy_cfg: Optional[mmcv.Config] = None):
+                       deploy_cfg: Optional[mmcv.Config] = None,
+                       *args,
+                       **kwargs):
         """The default methods to build backend wrappers.
 
         Args:
@@ -44,6 +47,8 @@ def _build_wrapper(backend: Backend,
             beckend_files (Sequence[str]): Paths to all required backend files(
                 e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn).
             device (str): A string specifying device type.
+            input_names (Sequence[str] | None): Names of model inputs in
+                order. Defaults to `None`.
             output_names (Sequence[str] | None): Names of model outputs in
                 order. Defaults to `None` and the wrapper will load the output
                 names from the model.
@@ -85,6 +90,12 @@ def _build_wrapper(backend: Backend,
                 model_file=backend_files[0],
                 task_name=task_name,
                 device=device)
+        elif backend == Backend.TORCHSCRIPT:
+            from mmdeploy.backend.torchscript import TorchscriptWrapper
+            return TorchscriptWrapper(
+                model=backend_files[0],
+                input_names=input_names,
+                output_names=output_names)
         else:
             raise NotImplementedError(f'Unknown backend type: {backend.value}')
 
diff --git a/mmdeploy/codebase/mmcls/deploy/classification_model.py b/mmdeploy/codebase/mmcls/deploy/classification_model.py
index f7f3bbfe73..263d097ecd 100644
--- a/mmdeploy/codebase/mmcls/deploy/classification_model.py
+++ b/mmdeploy/codebase/mmcls/deploy/classification_model.py
@@ -56,6 +56,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str],
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
diff --git a/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py b/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py
index 58e7030cab..2d26318af6 100644
--- a/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py
+++ b/mmdeploy/codebase/mmcls/models/backbones/shufflenet_v2.py
@@ -3,13 +3,17 @@
 from mmcls.models.utils import channel_shuffle
 
 from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils import Backend
 
 
 # torch.chunk will export dynamic shape slice, which will lead integer input
 # on ncnn backend. So the model needs to rewrite.
 @FUNCTION_REWRITER.register_rewriter(
     func_name='mmcls.models.backbones.shufflenet_v2.InvertedResidual.forward',
-    backend='ncnn')
+    backend=Backend.NCNN.value)
+@FUNCTION_REWRITER.register_rewriter(
+    func_name='mmcls.models.backbones.shufflenet_v2.InvertedResidual.forward',
+    backend=Backend.TORCHSCRIPT.value)
 def shufflenetv2_backbone__forward__ncnn(ctx, self, x):
     """Rewrite `forward` of InvertedResidual used in shufflenet_v2 for ncnn
     backend.
diff --git a/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py b/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py
index 1b9b7904a5..56d71499cb 100644
--- a/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py
+++ b/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py
@@ -92,48 +92,42 @@ def delta2bbox(ctx,
         bboxes (Tensor): Boxes with shape (N, num_classes * 4) or (N, 4),
             where 4 represent tl_x, tl_y, br_x, br_y.
     """
-    means = deltas.new_tensor(means).view(1,
-                                          -1).repeat(1,
-                                                     deltas.size(-1) // 4)
-    stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(-1) // 4)
-    denorm_deltas = deltas * stds + means
-    dx = denorm_deltas[..., 0::4]
-    dy = denorm_deltas[..., 1::4]
-    dw = denorm_deltas[..., 2::4]
-    dh = denorm_deltas[..., 3::4]
+    means = deltas.new_tensor(means).view(1, -1)
+    stds = deltas.new_tensor(stds).view(1, -1)
+    delta_shape = deltas.shape
+    reshaped_deltas = deltas.view(delta_shape[:-1] + (-1, 4))
+    denorm_deltas = reshaped_deltas * stds + means
 
-    x1, y1 = rois[..., 0], rois[..., 1]
-    x2, y2 = rois[..., 2], rois[..., 3]
-    # Compute center of each roi
-    px = ((x1 + x2) * 0.5).unsqueeze(-1).expand_as(dx)
-    py = ((y1 + y2) * 0.5).unsqueeze(-1).expand_as(dy)
-    # Compute width/height of each roi
-    pw = (x2 - x1).unsqueeze(-1).expand_as(dw)
-    ph = (y2 - y1).unsqueeze(-1).expand_as(dh)
+    dxy = denorm_deltas[..., :2]
+    dwh = denorm_deltas[..., 2:]
 
-    dx_width = pw * dx
-    dy_height = ph * dy
+    xy1 = rois[..., None, :2]
+    xy2 = rois[..., None, 2:]
+
+    pxy = (xy1 + xy2) * 0.5
+    pwh = xy2 - xy1
+    dxy_wh = pwh * dxy
 
     max_ratio = np.abs(np.log(wh_ratio_clip))
     if add_ctr_clamp:
-        dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp)
-        dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp)
-        dw = torch.clamp(dw, max=max_ratio)
-        dh = torch.clamp(dh, max=max_ratio)
+        dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
+        dwh = torch.clamp(dwh, max=max_ratio)
     else:
-        dw = dw.clamp(min=-max_ratio, max=max_ratio)
-        dh = dh.clamp(min=-max_ratio, max=max_ratio)
+        dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
+
     # Use exp(network energy) to enlarge/shrink each roi
-    gw = pw * dw.exp()
-    gh = ph * dh.exp()
+    half_gwh = pwh * dwh.exp() * 0.5
     # Use network energy to shift the center of each roi
-    gx = px + dx_width
-    gy = py + dy_height
+    gxy = pxy + dxy_wh
+
     # Convert center-xy/width/height to top-left, bottom-right
-    x1 = gx - gw * 0.5
-    y1 = gy - gh * 0.5
-    x2 = gx + gw * 0.5
-    y2 = gy + gh * 0.5
+    xy1 = gxy - half_gwh
+    xy2 = gxy + half_gwh
+
+    x1 = xy1[..., 0]
+    y1 = xy1[..., 1]
+    x2 = xy2[..., 0]
+    y2 = xy2[..., 1]
 
     if clip_border and max_shape is not None:
         from mmdeploy.codebase.mmdet.deploy import clip_bboxes
@@ -190,68 +184,42 @@ def delta2bbox__ncnn(ctx,
             or (N, num_classes * 4) or (N, 4), where 4 represent tl_x, tl_y,
             br_x, br_y.
     """
-    means = deltas.new_tensor(means).view(1, 1,
-                                          -1).repeat(1, deltas.size(-2),
-                                                     deltas.size(-1) // 4).data
-    stds = deltas.new_tensor(stds).view(1, 1,
-                                        -1).repeat(1, deltas.size(-2),
-                                                   deltas.size(-1) // 4).data
-    denorm_deltas = deltas * stds + means
-    if denorm_deltas.shape[-1] == 4:
-        dx = denorm_deltas[..., 0:1]
-        dy = denorm_deltas[..., 1:2]
-        dw = denorm_deltas[..., 2:3]
-        dh = denorm_deltas[..., 3:4]
-    else:
-        dx = denorm_deltas[..., 0::4]
-        dy = denorm_deltas[..., 1::4]
-        dw = denorm_deltas[..., 2::4]
-        dh = denorm_deltas[..., 3::4]
+    means = deltas.new_tensor(means).view(1, 1, 1, -1).data
+    stds = deltas.new_tensor(stds).view(1, 1, 1, -1).data
+    delta_shape = deltas.shape
+    reshaped_deltas = deltas.view(delta_shape[:-1] + (-1, 4))
+    denorm_deltas = reshaped_deltas * stds + means
 
-    x1, y1 = rois[..., 0:1], rois[..., 1:2]
-    x2, y2 = rois[..., 2:3], rois[..., 3:4]
+    dxy = denorm_deltas[..., :2]
+    dwh = denorm_deltas[..., 2:]
 
-    # Compute center of each roi
-    px = (x1 + x2) * 0.5
-    py = (y1 + y2) * 0.5
-    # Compute width/height of each roi
-    pw = x2 - x1
-    ph = y2 - y1
+    xy1 = rois[..., None, :2]
+    xy2 = rois[..., None, 2:]
 
-    # do not use expand unless necessary
-    # since expand is a custom ops
-    if px.shape[-1] != 4:
-        px = px.expand_as(dx)
-    if py.shape[-1] != 4:
-        py = py.expand_as(dy)
-    if pw.shape[-1] != 4:
-        pw = pw.expand_as(dw)
-    if px.shape[-1] != 4:
-        ph = ph.expand_as(dh)
-
-    dx_width = pw * dx
-    dy_height = ph * dy
+    pxy = (xy1 + xy2) * 0.5
+    pwh = xy2 - xy1
+    dxy_wh = pwh * dxy
 
     max_ratio = np.abs(np.log(wh_ratio_clip))
     if add_ctr_clamp:
-        dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp)
-        dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp)
-        dw = torch.clamp(dw, max=max_ratio)
-        dh = torch.clamp(dh, max=max_ratio)
+        dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
+        dwh = torch.clamp(dwh, max=max_ratio)
     else:
-        dw = dw.clamp(min=-max_ratio, max=max_ratio)
-        dh = dh.clamp(min=-max_ratio, max=max_ratio)
+        dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
+
     # Use exp(network energy) to enlarge/shrink each roi
-    gw = pw * dw.exp()
-    gh = ph * dh.exp()
+    half_gwh = pwh * dwh.exp() * 0.5
     # Use network energy to shift the center of each roi
-    gx = px + dx_width
-    gy = py + dy_height
+    gxy = pxy + dxy_wh
+
     # Convert center-xy/width/height to top-left, bottom-right
-    x1 = gx - gw * 0.5
-    y1 = gy - gh * 0.5
-    x2 = gx + gw * 0.5
-    y2 = gy + gh * 0.5
+    xy1 = gxy - half_gwh
+    xy2 = gxy + half_gwh
+
+    x1 = xy1[..., 0]
+    y1 = xy1[..., 1]
+    x2 = xy2[..., 0]
+    y2 = xy2[..., 1]
 
     if clip_border and max_shape is not None:
         from mmdeploy.codebase.mmdet.deploy import clip_bboxes
diff --git a/mmdeploy/codebase/mmdet/core/ops/detection_output.py b/mmdeploy/codebase/mmdet/core/ops/detection_output.py
index 48d9f84415..67809660de 100644
--- a/mmdeploy/codebase/mmdet/core/ops/detection_output.py
+++ b/mmdeploy/codebase/mmdet/core/ops/detection_output.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 
 
diff --git a/mmdeploy/codebase/mmdet/core/ops/prior_box.py b/mmdeploy/codebase/mmdet/core/ops/prior_box.py
index 24efb02dee..28d76f95bf 100644
--- a/mmdeploy/codebase/mmdet/core/ops/prior_box.py
+++ b/mmdeploy/codebase/mmdet/core/ops/prior_box.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 
 
diff --git a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py
index 2580d37ef8..ee7a1403d7 100644
--- a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py
+++ b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py
@@ -5,7 +5,7 @@
 import mmdeploy
 from mmdeploy.core import FUNCTION_REWRITER, mark
 from mmdeploy.mmcv.ops import ONNXNMSop, TRTBatchedNMSop
-from mmdeploy.utils import is_dynamic_batch
+from mmdeploy.utils import Backend, is_dynamic_batch
 
 
 def select_nms_index(scores: torch.Tensor,
@@ -269,3 +269,60 @@ def multiclass_nms(*args, **kwargs):
     """Wrapper function for `_multiclass_nms`."""
     return mmdeploy.codebase.mmdet.core.post_processing._multiclass_nms(
         *args, **kwargs)
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    func_name='mmdeploy.codebase.mmdet.core.post_processing._multiclass_nms',
+    backend=Backend.TORCHSCRIPT.value)
+def multiclass_nms__torchscript(ctx,
+                                boxes: Tensor,
+                                scores: Tensor,
+                                max_output_boxes_per_class: int = 1000,
+                                iou_threshold: float = 0.5,
+                                score_threshold: float = 0.05,
+                                pre_top_k: int = -1,
+                                keep_top_k: int = -1):
+    """rewrite for torchscript batched nms.
+
+    Use batched_nms from torchvision instead of custom nms.
+    """
+    # TODO: simplify inference for non-batch model
+    from torchvision.ops import batched_nms
+    batch_size = scores.shape[0]
+    num_boxes = scores.shape[1]
+    num_classes = scores.shape[2]
+    box_per_cls = len(boxes.shape) == 4
+    scores = torch.where(scores > score_threshold, scores, scores.new_zeros(1))
+
+    # pre-topk
+    if pre_top_k > 0:
+        max_scores, _ = scores.max(-1)
+        _, topk_inds = max_scores.topk(pre_top_k)
+        batch_inds = torch.arange(batch_size).view(
+            -1, 1).expand_as(topk_inds).long()
+        boxes = boxes[batch_inds, topk_inds, ...]
+        scores = scores[batch_inds, topk_inds, :]
+        num_boxes = scores.shape[1]
+
+    idxs = torch.arange(0, batch_size, device=scores.device).unsqueeze(1)
+    idxs = idxs.repeat(1, num_boxes).view(-1)
+
+    keeps = [None] * num_classes
+    for cls_id in range(num_classes):
+        box = boxes if not box_per_cls else boxes[:, :, cls_id, :]
+        score = scores[:, :, cls_id]
+        box = box.view(-1, 4)
+        score = score.view(-1)
+        box_keep = batched_nms(box, score, idxs, iou_threshold=iou_threshold)
+        box_keep = box_keep[:max_output_boxes_per_class * batch_size]
+        batch_keep = idxs[box_keep]
+        cls_keep = torch.ones_like(box_keep) * cls_id
+        box_keep = box_keep - batch_keep * num_boxes
+        keeps[cls_id] = torch.stack([batch_keep, cls_keep, box_keep], dim=1)
+
+    keeps = torch.cat(keeps)
+    scores = scores.permute(0, 2, 1)
+    dets, labels = select_nms_index(
+        scores, boxes, keeps, batch_size, keep_top_k=keep_top_k)
+
+    return dets, labels
diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
index 79cdae9ecc..b368d10972 100644
--- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
+++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
@@ -78,6 +78,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str],
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
@@ -424,13 +425,14 @@ def _init_wrapper(self, backend, backend_files, device):
             backend,
             backend_files[0:n],
             device,
-            partition0_output_names,
+            output_names=partition0_output_names,
             deploy_cfg=self.deploy_cfg)
 
         self.second_wrapper = BaseBackendModel._build_wrapper(
             backend,
             backend_files[n:2 * n],
-            device, ['cls_score', 'bbox_pred'],
+            device,
+            output_names=['cls_score', 'bbox_pred'],
             deploy_cfg=self.deploy_cfg)
 
     def partition0_postprocess(self, x: Sequence[torch.Tensor],
diff --git a/mmdeploy/codebase/mmdet/deploy/utils.py b/mmdeploy/codebase/mmdet/deploy/utils.py
index 1ecd451e2f..860cb54239 100644
--- a/mmdeploy/codebase/mmdet/deploy/utils.py
+++ b/mmdeploy/codebase/mmdet/deploy/utils.py
@@ -5,6 +5,8 @@
 import torch
 from torch import Tensor
 
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.core.rewriters.rewriter_utils import LibVersionChecker
 from mmdeploy.utils import load_config
 
 
@@ -69,6 +71,33 @@ def clip_bboxes(x1: Tensor, y1: Tensor, x2: Tensor, y2: Tensor,
     return x1, y1, x2, y2
 
 
+@FUNCTION_REWRITER.register_rewriter(
+    func_name='mmdeploy.codebase.mmdet.deploy.utils.clip_bboxes',
+    backend='tensorrt',
+    extra_checkers=LibVersionChecker('tensorrt', min_version='8'))
+def clip_bboxes__trt8(ctx, x1: Tensor, y1: Tensor, x2: Tensor, y2: Tensor,
+                      max_shape: Union[Tensor, Sequence[int]]):
+    """Clip bboxes for onnx. From TensorRT 8 we can do the operators on the
+    tensors directly.
+
+    Args:
+        ctx (ContextCaller): The context with additional information.
+        x1 (Tensor): The x1 for bounding boxes.
+        y1 (Tensor): The y1 for bounding boxes.
+        x2 (Tensor): The x2 for bounding boxes.
+        y2 (Tensor): The y2 for bounding boxes.
+        max_shape (Tensor | Sequence[int]): The (H,W) of original image.
+    Returns:
+        tuple(Tensor): The clipped x1, y1, x2, y2.
+    """
+    assert len(max_shape) == 2, '`max_shape` should be [h, w]'
+    x1 = torch.clamp(x1, 0, max_shape[1])
+    y1 = torch.clamp(y1, 0, max_shape[0])
+    x2 = torch.clamp(x2, 0, max_shape[1])
+    y2 = torch.clamp(y2, 0, max_shape[0])
+    return x1, y1, x2, y2
+
+
 def pad_with_value(x: Tensor,
                    pad_dim: int,
                    pad_size: int,
diff --git a/mmdeploy/codebase/mmdet/models/backbones.py b/mmdeploy/codebase/mmdet/models/backbones.py
index 2012df0241..6520702adb 100644
--- a/mmdeploy/codebase/mmdet/models/backbones.py
+++ b/mmdeploy/codebase/mmdet/models/backbones.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 
 from mmdeploy.core import FUNCTION_REWRITER
diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/__init__.py b/mmdeploy/codebase/mmdet/models/dense_heads/__init__.py
index 080fe6c7ba..9043d44264 100644
--- a/mmdeploy/codebase/mmdet/models/dense_heads/__init__.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/__init__.py
@@ -2,6 +2,7 @@
 from .base_dense_head import (base_dense_head__get_bbox,
                               base_dense_head__get_bboxes__ncnn)
 from .fovea_head import fovea_head__get_bboxes
+from .gfl_head import gfl_head__get_bbox
 from .rpn_head import rpn_head__get_bboxes, rpn_head__get_bboxes__ncnn
 from .ssd_head import ssd_head__get_bboxes__ncnn
 from .yolo_head import yolov3_head__get_bboxes, yolov3_head__get_bboxes__ncnn
@@ -12,5 +13,6 @@
     'yolov3_head__get_bboxes', 'yolov3_head__get_bboxes__ncnn',
     'yolox_head__get_bboxes', 'base_dense_head__get_bbox',
     'fovea_head__get_bboxes', 'base_dense_head__get_bboxes__ncnn',
-    'ssd_head__get_bboxes__ncnn', 'yolox_head__get_bboxes__ncnn'
+    'ssd_head__get_bboxes__ncnn', 'yolox_head__get_bboxes__ncnn',
+    'gfl_head__get_bbox'
 ]
diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py
index d4413182a9..3c94c16250 100644
--- a/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 from mmdet.core.bbox.coder import (DeltaXYWHBBoxCoder, DistancePointBBoxCoder,
                                    TBLRBBoxCoder)
diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/gfl_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/gfl_head.py
new file mode 100644
index 0000000000..8dba8b5666
--- /dev/null
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/gfl_head.py
@@ -0,0 +1,185 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn.functional as F
+
+from mmdeploy.codebase.mmdet import (get_post_processing_params,
+                                     multiclass_nms, pad_with_value)
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils import Backend, get_backend, is_dynamic_shape
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    func_name='mmdet.models.dense_heads.gfl_head.'
+    'GFLHead.get_bboxes')
+def gfl_head__get_bbox(ctx,
+                       self,
+                       cls_scores,
+                       bbox_preds,
+                       score_factors=None,
+                       img_metas=None,
+                       cfg=None,
+                       rescale=False,
+                       with_nms=True,
+                       **kwargs):
+    """Rewrite `get_bboxes` of `GFLHead` for default backend.
+
+    Rewrite this function to deploy model, transform network output for a
+    batch into bbox predictions.
+
+    Args:
+        ctx (ContextCaller): The context with additional information.
+        self: The instance of the original class.
+        cls_scores (list[Tensor]): Classification scores for all
+            scale levels, each is a 4D-tensor, has shape
+            (batch_size, num_priors * num_classes, H, W).
+        bbox_preds (list[Tensor]): Box energies / deltas for all
+            scale levels, each is a 4D-tensor, has shape
+            (batch_size, num_priors * 4, H, W).
+        score_factors (list[Tensor], Optional): Score factor for
+            all scale level, each is a 4D-tensor, has shape
+            (batch_size, num_priors * 1, H, W). Default None.
+        img_metas (list[dict], Optional): Image meta info. Default None.
+        cfg (mmcv.Config, Optional): Test / postprocessing configuration,
+            if None, test_cfg would be used.  Default None.
+        rescale (bool): If True, return boxes in original image space.
+            Default False.
+        with_nms (bool): If True, do nms before return boxes.
+            Default True.
+
+    Returns:
+        If with_nms == True:
+            tuple[Tensor, Tensor]: tuple[Tensor, Tensor]: (dets, labels),
+            `dets` of shape [N, num_det, 5] and `labels` of shape
+            [N, num_det].
+        Else:
+            tuple[Tensor, Tensor, Tensor]: batch_mlvl_bboxes,
+                batch_mlvl_scores, batch_mlvl_centerness
+    """
+    deploy_cfg = ctx.cfg
+    is_dynamic_flag = is_dynamic_shape(deploy_cfg)
+    backend = get_backend(deploy_cfg)
+    num_levels = len(cls_scores)
+
+    featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+    mlvl_priors = self.prior_generator.grid_priors(
+        featmap_sizes, dtype=bbox_preds[0].dtype, device=bbox_preds[0].device)
+
+    mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)]
+    mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)]
+    if score_factors is None:
+        with_score_factors = False
+        mlvl_score_factor = [None for _ in range(num_levels)]
+    else:
+        with_score_factors = True
+        mlvl_score_factor = [
+            score_factors[i].detach() for i in range(num_levels)
+        ]
+        mlvl_score_factors = []
+    assert img_metas is not None
+    img_shape = img_metas[0]['img_shape']
+
+    assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors)
+    batch_size = cls_scores[0].shape[0]
+    cfg = self.test_cfg
+    pre_topk = cfg.get('nms_pre', -1)
+
+    mlvl_valid_bboxes = []
+    mlvl_valid_scores = []
+    mlvl_valid_priors = []
+
+    for cls_score, bbox_pred, score_factors, priors, stride in zip(
+            mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor, mlvl_priors,
+            self.prior_generator.strides):
+        assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+        assert stride[0] == stride[1]
+
+        scores = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1,
+                                                       self.cls_out_channels)
+        if self.use_sigmoid_cls:
+            scores = scores.sigmoid()
+            nms_pre_score = scores
+        else:
+            scores = scores.softmax(-1)
+            nms_pre_score = scores
+        if with_score_factors:
+            score_factors = score_factors.permute(0, 2, 3,
+                                                  1).reshape(batch_size,
+                                                             -1).sigmoid()
+            score_factors = score_factors.unsqueeze(2)
+        bbox_pred = batched_integral(self.integral,
+                                     bbox_pred.permute(0, 2, 3, 1)) * stride[0]
+        if not is_dynamic_flag:
+            priors = priors.data
+        priors = priors.expand(batch_size, -1, priors.size(-1))
+        if pre_topk > 0:
+            if with_score_factors:
+                nms_pre_score = nms_pre_score * score_factors
+            if backend == Backend.TENSORRT:
+                priors = pad_with_value(priors, 1, pre_topk)
+                bbox_pred = pad_with_value(bbox_pred, 1, pre_topk)
+                scores = pad_with_value(scores, 1, pre_topk, 0.)
+                nms_pre_score = pad_with_value(nms_pre_score, 1, pre_topk, 0.)
+                if with_score_factors:
+                    score_factors = pad_with_value(score_factors, 1, pre_topk,
+                                                   0.)
+
+            # Get maximum scores for foreground classes.
+            if self.use_sigmoid_cls:
+                max_scores, _ = nms_pre_score.max(-1)
+            else:
+                max_scores, _ = nms_pre_score[..., :-1].max(-1)
+            _, topk_inds = max_scores.topk(pre_topk)
+            batch_inds = torch.arange(
+                batch_size,
+                device=bbox_pred.device).view(-1, 1).expand_as(topk_inds)
+            priors = priors[batch_inds, topk_inds, :]
+            bbox_pred = bbox_pred[batch_inds, topk_inds, :]
+            scores = scores[batch_inds, topk_inds, :]
+            if with_score_factors:
+                score_factors = score_factors[batch_inds, topk_inds, :]
+
+        mlvl_valid_bboxes.append(bbox_pred)
+        mlvl_valid_scores.append(scores)
+        priors = self.anchor_center(priors)
+        mlvl_valid_priors.append(priors)
+        if with_score_factors:
+            mlvl_score_factors.append(score_factors)
+
+    batch_mlvl_bboxes_pred = torch.cat(mlvl_valid_bboxes, dim=1)
+    batch_scores = torch.cat(mlvl_valid_scores, dim=1)
+    batch_priors = torch.cat(mlvl_valid_priors, dim=1)
+    batch_bboxes = self.bbox_coder.decode(
+        batch_priors, batch_mlvl_bboxes_pred, max_shape=img_shape)
+    if with_score_factors:
+        batch_score_factors = torch.cat(mlvl_score_factors, dim=1)
+
+    if not self.use_sigmoid_cls:
+        batch_scores = batch_scores[..., :self.num_classes]
+
+    if with_score_factors:
+        batch_scores = batch_scores * batch_score_factors
+    if not with_nms:
+        return batch_bboxes, batch_scores
+    post_params = get_post_processing_params(deploy_cfg)
+    max_output_boxes_per_class = post_params.max_output_boxes_per_class
+    iou_threshold = cfg.nms.get('iou_threshold', post_params.iou_threshold)
+    score_threshold = cfg.get('score_thr', post_params.score_threshold)
+    pre_top_k = post_params.pre_top_k
+    keep_top_k = cfg.get('max_per_img', post_params.keep_top_k)
+    return multiclass_nms(
+        batch_bboxes,
+        batch_scores,
+        max_output_boxes_per_class,
+        iou_threshold=iou_threshold,
+        score_threshold=score_threshold,
+        pre_top_k=pre_top_k,
+        keep_top_k=keep_top_k)
+
+
+def batched_integral(intergral, x):
+    batch_size = x.size(0)
+    x = F.softmax(x.reshape(batch_size, -1, intergral.reg_max + 1), dim=2)
+    x = F.linear(x,
+                 intergral.project.type_as(x).unsqueeze(0)).reshape(
+                     batch_size, -1, 4)
+    return x
diff --git a/mmdeploy/codebase/mmdet/models/necks.py b/mmdeploy/codebase/mmdet/models/necks.py
index f430124381..2931de9b0a 100644
--- a/mmdeploy/codebase/mmdet/models/necks.py
+++ b/mmdeploy/codebase/mmdet/models/necks.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import torch
 
 from mmdeploy.core import FUNCTION_REWRITER
diff --git a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py
index c997a5ab6a..de0ea22dad 100644
--- a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py
+++ b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py
@@ -4,6 +4,8 @@
 
 from mmdeploy.core.optimizers import mark
 from mmdeploy.core.rewriters import FUNCTION_REWRITER
+from mmdeploy.utils import get_backend
+from mmdeploy.utils.constants import Backend
 
 
 class MultiLevelRoiAlign(Function):
@@ -98,14 +100,18 @@ def single_roi_extractor__forward(ctx,
                                   roi_scale_factor=None):
     """Rewrite `forward` of SingleRoIExtractor for default backend.
 
-    Rewrite this function to enable exporting to onnx even though the input
+    Rewrite this function to:
+    1. enable exporting to IR even though the input
     image contains no targets. Note that, `ScatterND` of onnx may conflict with
     `Reshape` if a tensor have a dim size of 0. Thus, we have to cat zeros to
     the dim 0 of `roi_feats` and recover back after all roi align finished.
 
-    Besides, this function adds mark for roi_extractor forward and remove
-    unnecessary code of origin forward function.
+    2. this function adds mark for roi_extractor forward and remove
+    unnecessary code of origin forward function when using ONNX as IR.
+
+    3. use the roi align in torhcvision to accelerate the inference.
     """
+    backend = get_backend(ctx.cfg)
     out_size = self.roi_layers[0].output_size
     num_levels = len(feats)
     roi_feats = feats[0].new_zeros(rois.shape[0], self.out_channels, *out_size)
@@ -118,29 +124,30 @@ def single_roi_extractor__forward(ctx,
     if roi_scale_factor is not None:
         rois = self.roi_rescale(rois, roi_scale_factor)
 
-    # concat len num_levels * 2 of zero tensors to dim 0 of roi_feats
+    # concate zeros to rois and roi_feats for empty tensor cases
     roi_feats = torch.cat(
         (roi_feats.new_zeros(num_levels * 2,
                              *roi_feats.shape[-3:]), roi_feats))
+    rois = torch.cat((rois.new_zeros(num_levels * 2, 5), rois))
+    _tmp = torch.linspace(
+        0,
+        num_levels - 1,
+        num_levels,
+        dtype=target_lvls.dtype,
+        device=target_lvls.device)
+    target_lvls = torch.cat((_tmp, _tmp, target_lvls))
     for i in range(num_levels):
+        # use the roi align in torhcvision to accelerate the inference
+        # roi_align in MMCV is same as torchvision when pool mode is 'avg'
+        if backend == Backend.TORCHSCRIPT or self.roi_layers[
+                i].pool_mode == 'avg':
+            self.roi_layers[i].use_torchvision = True
         mask = target_lvls == i
         inds = mask.nonzero(as_tuple=False).squeeze(1)
-
-        # concat len 2 zero tensors to dim 0 of roi_feats
-        rois_i = torch.cat((rois.new_zeros(2, 5), rois[inds]))
-
-        roi_feats_t = self.roi_layers[i](feats[i], rois_i)
-
-        # correspondingly change the inds
-        inds = torch.cat([
-            torch.tensor([2 * i, 2 * i + 1],
-                         device=inds.device,
-                         dtype=inds.dtype), inds + num_levels * 2
-        ])
+        roi_feats_t = self.roi_layers[i](feats[i], rois[inds])
         roi_feats[inds] = roi_feats_t
-
-    # slice and recover tensors
-    roi_feats = roi_feats[num_levels * (2):]
+    # slice to recover original size
+    roi_feats = roi_feats[num_levels * 2:]
     return roi_feats
 
 
diff --git a/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py b/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py
index 903899b6ee..ade5d0beea 100644
--- a/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py
+++ b/mmdeploy/codebase/mmedit/deploy/super_resolution_model.py
@@ -54,6 +54,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str],
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
@@ -204,11 +205,7 @@ def forward(self,
         output = self.wrapper.invoke([img])[0]
         if test_mode:
             output = torch.from_numpy(output)
-            output = torch.permute(output, (
-                2,
-                0,
-                1,
-            ))
+            output = output.permute(2, 0, 1)
             output = output / 255.
             results = self.test_post_process([output], lq, gt)
             return results
diff --git a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
index 31861b66e4..d6917161d9 100644
--- a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
+++ b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
@@ -67,6 +67,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str],
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
diff --git a/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py b/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py
index 7f07dbba63..de9d18154f 100644
--- a/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py
+++ b/mmdeploy/codebase/mmocr/deploy/text_recognition_model.py
@@ -72,6 +72,7 @@ def _init_wrapper(self, backend: Backend, backend_files: Sequence[str],
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
diff --git a/mmdeploy/codebase/mmseg/deploy/segmentation_model.py b/mmdeploy/codebase/mmseg/deploy/segmentation_model.py
index 46e0789031..a57cb9a70b 100644
--- a/mmdeploy/codebase/mmseg/deploy/segmentation_model.py
+++ b/mmdeploy/codebase/mmseg/deploy/segmentation_model.py
@@ -59,6 +59,7 @@ def _init_wrapper(self, backend, backend_files, device):
             backend=backend,
             backend_files=backend_files,
             device=device,
+            input_names=[self.input_name],
             output_names=output_names,
             deploy_cfg=self.deploy_cfg)
 
diff --git a/mmdeploy/core/optimizers/function_marker.py b/mmdeploy/core/optimizers/function_marker.py
index 98a46f6e73..5ad0501593 100644
--- a/mmdeploy/core/optimizers/function_marker.py
+++ b/mmdeploy/core/optimizers/function_marker.py
@@ -5,7 +5,7 @@
 import torch
 
 from mmdeploy.core.rewriters import FUNCTION_REWRITER
-from mmdeploy.utils import cfg_apply_marks, get_partition_config
+from mmdeploy.utils import IR, cfg_apply_marks, get_partition_config
 
 MARK_FUNCTION_COUNT = dict()
 
@@ -180,6 +180,20 @@ def impl(ys, prefix, level):
     return impl(xs, (), level)
 
 
+@FUNCTION_REWRITER.register_rewriter(
+    'mmdeploy.core.optimizers.function_marker.mark_tensors', ir=IR.TORCHSCRIPT)
+def remove_mark__torchscript(ctx, xs: Any, *args, **kwargs):
+    """Disable all marks for TorchScript backend.
+
+    As the Node `mark` is not able to be traced, we just return original input
+    for the function `mark_tensors`.
+
+    Args:
+        xs (Any): Input structure which contains tensor.
+    """
+    return xs
+
+
 def mark(func_name: Optional[str] = None,
          inputs: Optional[Sequence[str]] = None,
          outputs: Optional[Sequence[str]] = None,
diff --git a/mmdeploy/core/rewriters/function_rewriter.py b/mmdeploy/core/rewriters/function_rewriter.py
index 674361f634..e80ed41d06 100644
--- a/mmdeploy/core/rewriters/function_rewriter.py
+++ b/mmdeploy/core/rewriters/function_rewriter.py
@@ -1,8 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Callable, Dict
+from typing import Callable, Dict, List, Optional, Union
 
-from mmdeploy.utils import Backend, get_root_logger
-from .rewriter_utils import ContextCaller, RewriterRegistry, import_function
+from mmdeploy.utils import IR, Backend, get_root_logger
+from .rewriter_utils import (Checker, ContextCaller, RewriterRegistry,
+                             import_function)
 
 
 def _set_func(origin_func_path: str, rewrite_func: Callable):
@@ -66,32 +67,33 @@ class FunctionRewriter:
     def __init__(self):
         self._registry = RewriterRegistry()
 
-    def add_backend(self, backend: str):
-        """Add a backend by calling the _registry.add_backend."""
-        self._registry.add_backend(backend)
-
-    def register_rewriter(self,
-                          func_name: str,
-                          backend: str = Backend.DEFAULT.value,
-                          **kwargs):
+    def register_rewriter(
+            self,
+            func_name: str,
+            backend: str = Backend.DEFAULT.value,
+            ir: IR = IR.DEFAULT,
+            extra_checkers: Optional[Union[Checker, List[Checker]]] = None,
+            **kwargs):
         """The interface of function rewriter decorator.
 
         Args:
             func_name (str): The function name/path to rewrite.
-            backend (str): The inference engine name.
+            backend (str): The rewriter will be activated on which backend.
+            ir (IR): The rewriter will be activated on which IR.
+            extra_checkers (Checker | List[Checker] | None): Other requirements
+                defined by Checker.
+
         Returns:
             Callable: The process of registering function.
         """
 
-        return self._registry.register_object(func_name, backend, **kwargs)
+        return self._registry.register_object(func_name, backend, ir,
+                                              extra_checkers, **kwargs)
 
-    def enter(self,
-              cfg: Dict = dict(),
-              backend: str = Backend.DEFAULT.value,
-              **kwargs):
+    def enter(self, cfg: Dict = dict(), env: Dict = dict(), **kwargs):
         """The implementation of function rewrite."""
         # Get current records
-        functions_records = self._registry.get_records(backend)
+        functions_records = self._registry.get_records(env)
 
         self._origin_functions = list()
         self._additional_functions = list()
diff --git a/mmdeploy/core/rewriters/module_rewriter.py b/mmdeploy/core/rewriters/module_rewriter.py
index 43720443c6..d0961809a0 100644
--- a/mmdeploy/core/rewriters/module_rewriter.py
+++ b/mmdeploy/core/rewriters/module_rewriter.py
@@ -1,11 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import inspect
+from typing import Dict, List, Optional, Union
 
 import mmcv
 from torch import nn
 
-from mmdeploy.utils.constants import Backend
-from .rewriter_utils import RewriterRegistry, eval_with_import
+from mmdeploy.utils.constants import IR, Backend
+from .rewriter_utils import (Checker, RewriterRegistry, collect_env,
+                             eval_with_import)
 
 
 class ModuleRewriter:
@@ -26,29 +28,33 @@ class ModuleRewriter:
     def __init__(self):
         self._registry = RewriterRegistry()
 
-    def add_backend(self, backend: str):
-        """Add a backend by calling the _registry.add_backend."""
-        self._registry.add_backend(backend)
-
-    def register_rewrite_module(self,
-                                module_type: str,
-                                backend: str = Backend.DEFAULT.value,
-                                **kwargs):
+    def register_rewrite_module(
+            self,
+            module_type: str,
+            backend: str = Backend.DEFAULT.value,
+            ir: IR = IR.DEFAULT,
+            extra_checkers: Optional[Union[Checker, List[Checker]]] = None,
+            **kwargs):
         """The interface of module rewriter decorator.
 
         Args:
             module_type (str): The module type name to rewrite.
-            backend (str): The inference engine name.
+            backend (str): The rewriter will be activated on which backend.
+            ir (IR): The rewriter will be activated on which IR.
+            extra_checkers (Checker | List[Checker] | None): Other requirements
+                defined by Checker.
 
         Returns:
-            nn.Module: THe rewritten model.
+            nn.Module: The rewritten model.
         """
-        return self._registry.register_object(module_type, backend, **kwargs)
+        return self._registry.register_object(module_type, backend, ir,
+                                              extra_checkers, **kwargs)
 
     def patch_model(self,
                     model: nn.Module,
                     cfg: mmcv.Config,
                     backend: str = Backend.DEFAULT.value,
+                    ir: IR = IR.DEFAULT,
                     recursive: bool = True,
                     **kwargs) -> nn.Module:
         """Replace the models that was registered.
@@ -57,6 +63,7 @@ def patch_model(self,
             model (torch.nn.Module): The model to patch.
             cfg (Dict): Config dictionary of deployment.
             backend (str): The inference engine name.
+            ir (IR): The intermeditate representation name.
             recursive (bool): The flag to enable recursive patching.
 
         Returns:
@@ -67,7 +74,9 @@ def patch_model(self,
             >>> patched_model = patch_model(model, cfg=deploy_cfg,
             >>>                             backend=backend)
         """
-        self._collect_record(backend)
+        # TODO: Make the type of parameter backend to Backend
+        env = collect_env(Backend.get(backend), ir)
+        self._collect_record(env)
         return self._replace_module(model, cfg, recursive, **kwargs)
 
     def _replace_one_module(self, module, cfg, **kwargs):
@@ -103,9 +112,9 @@ def _replace_module_impl(model, cfg, **kwargs):
 
         return _replace_module_impl(model, cfg, **kwargs)
 
-    def _collect_record(self, backend: str):
+    def _collect_record(self, env: Dict):
         """Collect models in registry."""
         self._records = {}
-        records = self._registry.get_records(backend)
+        records = self._registry.get_records(env)
         for name, kwargs in records:
             self._records[eval_with_import(name)] = kwargs
diff --git a/mmdeploy/core/rewriters/rewriter_manager.py b/mmdeploy/core/rewriters/rewriter_manager.py
index df7e82703d..de3acaffd2 100644
--- a/mmdeploy/core/rewriters/rewriter_manager.py
+++ b/mmdeploy/core/rewriters/rewriter_manager.py
@@ -4,9 +4,10 @@
 import mmcv
 import torch.nn as nn
 
-from mmdeploy.utils.constants import Backend
+from mmdeploy.utils.constants import IR, Backend
 from .function_rewriter import FunctionRewriter
 from .module_rewriter import ModuleRewriter
+from .rewriter_utils import collect_env
 from .symbolic_rewriter import SymbolicRewriter
 
 
@@ -18,20 +19,8 @@ def __init__(self):
         self.function_rewriter = FunctionRewriter()
         self.symbolic_rewriter = SymbolicRewriter()
 
-    def add_backend(self, backend: str):
-        """Add backend to all rewriters.
-
-        Args:
-            backend (str): The backend to support.
-        """
-        self.module_rewriter.add_backend(backend)
-        self.function_rewriter.add_backend(backend)
-        self.symbolic_rewriter.add_backend(backend)
-
 
 REWRITER_MANAGER = RewriterManager()
-for backend in Backend:
-    REWRITER_MANAGER.add_backend(backend.value)
 
 MODULE_REWRITER = REWRITER_MANAGER.module_rewriter
 FUNCTION_REWRITER = REWRITER_MANAGER.function_rewriter
@@ -41,6 +30,7 @@ def add_backend(self, backend: str):
 def patch_model(model: nn.Module,
                 cfg: mmcv.Config,
                 backend: str = Backend.DEFAULT.value,
+                ir: IR = IR.DEFAULT,
                 recursive: bool = True,
                 **kwargs) -> nn.Module:
     """Patch the model, replace the modules that can be rewritten. Note that
@@ -50,6 +40,7 @@ def patch_model(model: nn.Module,
         model (torch.nn.Module): The model to patch.
         cfg (Dict): Config dictionary of deployment.
         backend (str): The inference engine name.
+        ir (IR): The intermeditate representation name.
         recursive (bool): The flag to enable recursive patching.
 
     Returns:
@@ -59,7 +50,7 @@ def patch_model(model: nn.Module,
         >>> from mmdeploy.core import patch_model
         >>> patched_model = patch_model(model, cfg=deploy_cfg, backend=backend)
     """
-    return MODULE_REWRITER.patch_model(model, cfg, backend, recursive,
+    return MODULE_REWRITER.patch_model(model, cfg, backend, ir, recursive,
                                        **kwargs)
 
 
@@ -71,6 +62,7 @@ class RewriterContext:
     Args:
         cfg (Dict): Config dictionary of deployment.
         backend (str): The inference engine name.
+        ir (IR): The intermeditate representation name.
         rewrite_manager (RewriterManager): An RewriteManager that consists of
             several rewriters
 
@@ -84,20 +76,19 @@ class RewriterContext:
     def __init__(self,
                  cfg: Dict = dict(),
                  backend: str = Backend.DEFAULT.value,
+                 ir: IR = IR.DEFAULT,
                  rewriter_manager: RewriterManager = REWRITER_MANAGER,
                  **kwargs):
         self._cfg = cfg
-        self._backend = backend
         self._kwargs = kwargs
         self._rewriter_manager = rewriter_manager
+        self._env = collect_env(Backend.get(backend), ir)
 
     def enter(self):
         """Call the enter() of rewriters."""
-        self._rewriter_manager.function_rewriter.enter(self._cfg,
-                                                       self._backend,
+        self._rewriter_manager.function_rewriter.enter(self._cfg, self._env,
                                                        **self._kwargs)
-        self._rewriter_manager.symbolic_rewriter.enter(self._cfg,
-                                                       self._backend,
+        self._rewriter_manager.symbolic_rewriter.enter(self._cfg, self._env,
                                                        **self._kwargs)
 
     def exit(self):
diff --git a/mmdeploy/core/rewriters/rewriter_utils.py b/mmdeploy/core/rewriters/rewriter_utils.py
index 701078144a..a80fd84738 100644
--- a/mmdeploy/core/rewriters/rewriter_utils.py
+++ b/mmdeploy/core/rewriters/rewriter_utils.py
@@ -1,8 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import inspect
-from typing import Any, Callable, Dict, List, Optional, Tuple
+import warnings
+from abc import ABCMeta, abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
-from mmdeploy.utils.constants import Backend
+import mmdeploy
+from mmdeploy.utils.constants import IR, Backend
 
 
 def eval_with_import(path: str) -> Any:
@@ -56,6 +59,127 @@ def import_function(path: str) -> Tuple[Callable, Optional[type]]:
         return obj, None
 
 
+def collect_env(backend: Backend, ir: IR, **kwargs) -> Dict:
+    """Collect current environment information, including backend, ir, codebase
+    version, etc. Rewriters will be checked according to env infos.
+
+    Args:
+        backend (Backend): Current backend.
+        ir (IR): Current IR.
+
+    Returns:
+        Dict: Record the value of Backend and IR as well as the versions of
+        libraries.
+    """
+    from mmdeploy.utils import get_backend_version, get_codebase_version
+    env = dict(backend=backend, ir=ir)
+    env['mmdeploy'] = mmdeploy.__version__
+    env.update(get_backend_version())
+    env.update(get_codebase_version())
+    env.update(kwargs)
+    return env
+
+
+class Checker(metaclass=ABCMeta):
+    """The interface for checking whether a rewriter is valid."""
+
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def check(self, env: Dict) -> bool:
+        """Check the if the rewriter is valid according to environment.
+
+        Args:
+            env (Dict): The backend, IR info and version info.
+        """
+        pass
+
+
+class BackendChecker(Checker):
+    """Checker that determines which backend the rewriter must run on.
+
+    Args:
+        required_backend (Backend): The rewriter will be activated on
+            which backend.
+    """
+
+    def __init__(self, required_backend: Backend):
+        super().__init__()
+        self.required_backend = required_backend
+
+    def check(self, env: Dict) -> bool:
+        """Check the if the rewriter is valid according to backend.
+
+        Args:
+            env (Dict): The backend, IR info and version info.
+        """
+        return env['backend'] == self.required_backend
+
+
+class IRChecker(Checker):
+    """Checker that determines which IR the rewriter must run on.
+
+    Args:
+        required_ir (IR): The rewriter will be activated on which IR.
+    """
+
+    def __init__(self, required_ir: IR):
+        super().__init__()
+        self.required_ir = required_ir
+
+    def check(self, env: Dict) -> bool:
+        """Check the if the rewriter is valid according to IR.
+
+        Args:
+            env (Dict): The backend, IR info and version info.
+        """
+        return env['ir'] == self.required_ir
+
+
+class LibVersionChecker(Checker):
+    """Checker that determines which IR the rewriter must run on.
+
+    Args:
+        lib (str): The name of library.
+        min_version (str | None): The rewriter should no lower than which
+            version. Default to `None`.
+        max_version (str | None): The rewriter should no greater than which
+            version. Default to `None`.
+    """
+
+    def __init__(self,
+                 lib: str,
+                 min_version: Optional[str] = None,
+                 max_version: Optional[str] = None):
+        super().__init__()
+        self.lib = lib
+        self.min_version = min_version
+        self.max_version = max_version
+
+    def check(self, env: Dict) -> bool:
+        """Check the if the rewriter is valid according to library version.
+
+        Args:
+            env (Dict): The backend, IR info and version info.
+        """
+        # If the library has not been installed
+        if env[self.lib] is None:
+            return False
+
+        from packaging import version
+        valid = True
+        # The version should no less than min version and no greater than
+        # max version.
+        if self.min_version is not None:
+            if version.parse(env[self.lib]) < version.parse(self.min_version):
+                valid = False
+        if self.max_version is not None:
+            if version.parse(env[self.lib]) > version.parse(self.max_version):
+                valid = False
+        return valid
+
+
 class RewriterRegistry:
     """A registry that recoreds rewrite objects.
 
@@ -75,58 +199,128 @@ class RewriterRegistry:
         >>> records = FUNCTION_REGISTRY.get_record("default")
     """
 
-    # TODO: replace backend string with "Backend" constant
     def __init__(self):
         self._rewrite_records = dict()
-        self.add_backend(Backend.DEFAULT.value)
-
-    def _check_backend(self, backend: str):
-        """Check if a backend has been supported."""
-        if backend not in self._rewrite_records:
-            raise Exception('Backend is not supported by registry.')
-
-    def add_backend(self, backend: str):
-        """Add a backend dictionary."""
-        if backend not in self._rewrite_records:
-            self._rewrite_records[backend] = dict()
-
-    def get_records(self, backend: str) -> List:
-        """Get all registered records in record table."""
-        self._check_backend(backend)
-
-        if backend != Backend.DEFAULT.value:
-            # Update dict A with dict B.
-            # Then convert the result dict to a list, while keeping the order
-            # of A and B: the elements only belong to B should alwarys come
-            # after the elements only belong to A.
-            # The complexity is O(n + m).
-            dict_a = self._rewrite_records[Backend.DEFAULT.value]
-            dict_b = self._rewrite_records[backend]
-            records = []
-            for k, v in dict_a.items():
-                if k in dict_b:
-                    records.append((k, dict_b[k]))
+
+    def get_records(self, env: Dict) -> List:
+        """Get all registered records that are valid in the given environment
+        from record table.
+
+        If the backend and IR of rewriter are set to 'default', then the
+        rewriter is regarded as default rewriter. The default rewriter will be
+        activated only when all other rewriters are not valid. If there are
+        multiple rewriters are valid (except default rewriter), we will
+        activate the first one (The order is determined by the time when
+        rewriters are loaded).
+
+        Args:
+            env (dict): Environment dictionary that includes backend, IR,
+                codebase version, etc.
+
+        Returns:
+            List: A list that includes valid records.
+        """
+        default_records = list()
+        records = list()
+
+        for origin_function, rewriter_records in self._rewrite_records.items():
+            default_rewriter = None
+            final_rewriter = None
+            for record in rewriter_records:
+                # Get the checkers of current rewriter
+                checkers: List[Checker] = record['_checkers']
+
+                # Check if the rewriter is default rewriter
+                if len(checkers) == 0:
+                    #  Process the default rewriter exceptionally
+                    if default_rewriter is None:
+                        default_rewriter = record
+                    else:
+                        warnings.warn(
+                            'Detect multiple valid rewriters for'
+                            f'{origin_function}, use the first rewriter.')
                 else:
-                    records.append((k, v))
-            for k, v in dict_b.items():
-                if k not in dict_a:
-                    records.append((k, v))
-        else:
-            records = list(
-                self._rewrite_records[Backend.DEFAULT.value].items())
-        return records
-
-    def _register(self, name: str, backend: str, **kwargs):
+                    # Check if the checker is valid.
+                    # The checker is valid only if all the checks are passed
+                    valid = True
+                    for checker in checkers:
+                        if not checker.check(env):
+                            valid = False
+                            break
+
+                    if valid:
+                        # Check if there are multiple valid rewriters
+                        if final_rewriter is not None:
+                            warnings.warn(
+                                'Detect multiple valid rewriters for'
+                                f'{origin_function}, use the first rewriter.')
+                        else:
+                            final_rewriter = record
+
+            # Append final rewriter.
+            # If there is no valid rewriter, try not apply default rewriter
+            if final_rewriter is not None:
+                records.append((origin_function, final_rewriter))
+            elif default_rewriter is not None:
+                default_records.append((origin_function, default_rewriter))
+
+        # Make the default records como to the front of list because we may
+        # want the non-default records to override them.
+        return default_records + records
+
+    def _register(self, name: str, backend: Backend, ir: IR,
+                  extra_checkers: List[Checker], **kwargs):
         """The implementation of register."""
-        self._check_backend(backend)
-        self._rewrite_records[backend][name] = kwargs
 
-    def register_object(self, name: str, backend: str, **kwargs) -> Callable:
-        """The decorator to register an object."""
-        self._check_backend(backend)
+        # Merge checkers to kwargs
+        record_dict = kwargs
+
+        # Try to create a checker according to 'backend' field
+        if backend != Backend.DEFAULT:
+            extra_checkers.append(BackendChecker(backend))
+
+        # Try to create a checker according to 'ir' field
+        if ir != IR.DEFAULT:
+            extra_checkers.append(IRChecker(ir))
+
+        record_dict['_checkers'] = extra_checkers
+
+        # There may be multiple rewriters of a function/module. We use a list
+        # to store the rewriters of a function/module.
+        if name not in self._rewrite_records:
+            self._rewrite_records[name] = list()
+        self._rewrite_records[name].append(record_dict)
+
+    def register_object(self,
+                        name: str,
+                        backend: str,
+                        ir: IR,
+                        extra_checkers: Optional[Union[Checker,
+                                                       List[Checker]]] = None,
+                        **kwargs) -> Callable:
+        """The decorator to register an object.
+
+        Args:
+            name (str): The import path to access the function/module.
+            backend (str): The rewriter will be activated on which backend.
+            ir (IR): The rewriter will be activated on which ir.
+            extra_chekcers (None | Checker | List[Checker]): Other requirements
+                for the rewriters. Default to `None`.
+
+        Returns:
+            Callable: The decorator.
+        """
+
+        if extra_checkers is None:
+            extra_checkers = []
+        elif isinstance(extra_checkers, Checker):
+            extra_checkers = [extra_checkers]
+
+        backend = Backend.get(backend)
 
         def decorator(object):
-            self._register(name, backend, _object=object, **kwargs)
+            self._register(
+                name, backend, ir, extra_checkers, _object=object, **kwargs)
             return object
 
         return decorator
diff --git a/mmdeploy/core/rewriters/symbolic_rewriter.py b/mmdeploy/core/rewriters/symbolic_rewriter.py
index c9c16d071d..dd47cd8d58 100644
--- a/mmdeploy/core/rewriters/symbolic_rewriter.py
+++ b/mmdeploy/core/rewriters/symbolic_rewriter.py
@@ -1,13 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Callable, Dict, Optional, Sequence
+from typing import Callable, Dict, List, Optional, Sequence, Union
 
 from torch.autograd import Function
 from torch.onnx.symbolic_helper import parse_args
 from torch.onnx.symbolic_registry import _registry as pytorch_registry
 from torch.onnx.symbolic_registry import register_op
 
-from mmdeploy.utils import Backend, get_root_logger
-from .rewriter_utils import ContextCaller, RewriterRegistry, eval_with_import
+from mmdeploy.utils import IR, Backend, get_root_logger
+from .rewriter_utils import (Checker, ContextCaller, RewriterRegistry,
+                             eval_with_import)
 
 
 class SymbolicRewriter:
@@ -35,25 +36,27 @@ class SymbolicRewriter:
     def __init__(self) -> None:
         self._registry = RewriterRegistry()
 
-    def add_backend(self, backend: str):
-        """Add a backend by calling the _registry.add_backend."""
-        self._registry.add_backend(backend)
-
     def register_symbolic(self,
                           func_name: str,
                           backend: str = Backend.DEFAULT.value,
                           is_pytorch: bool = False,
                           arg_descriptors: Optional[Sequence[str]] = None,
+                          ir: IR = IR.DEFAULT,
+                          extra_checkers: Optional[Union[
+                              Checker, List[Checker]]] = None,
                           **kwargs) -> Callable:
         """The decorator of the custom symbolic.
 
         Args:
             func_name (str): The function name/path to override the symbolic.
-            backend (str): The inference engine name.
+            backend (str): The rewriter will be activated on which backend.
             is_pytorch (bool): Enable this flag if func_name is the name of \
                 a pytorch builtin function.
             arg_descriptors (Sequence[str]): The argument descriptors of the \
                 symbol.
+            ir (IR): The rewriter will be activated on which IR.
+            extra_checkers (Checker | List[Checker] | None): Other requirements
+                defined by Checker.
 
         Returns:
             Callable: The process of registered symbolic.
@@ -61,18 +64,20 @@ def register_symbolic(self,
         return self._registry.register_object(
             func_name,
             backend,
+            ir,
+            extra_checkers,
             is_pytorch=is_pytorch,
             arg_descriptors=arg_descriptors,
             **kwargs)
 
     def enter(self,
               cfg: Dict = dict(),
-              backend: str = Backend.DEFAULT.value,
+              env: Dict = dict(),
               opset: int = 11,
               **kwargs):
         """The implementation of symbolic register."""
         # Get current records
-        symbolic_records = self._registry.get_records(backend)
+        symbolic_records = self._registry.get_records(env)
 
         self._pytorch_symbolic = list()
         self._extra_symbolic = list()
diff --git a/mmdeploy/pytorch/functions/interpolate.py b/mmdeploy/pytorch/functions/interpolate.py
index 2d9632ea30..2b619b9535 100644
--- a/mmdeploy/pytorch/functions/interpolate.py
+++ b/mmdeploy/pytorch/functions/interpolate.py
@@ -90,7 +90,8 @@ def forward(g, input, scale_factor, align_corners):
                 'tensor. Which is not available for custom ops. Computed scale'
                 '_factor might be the right way to get final shape.')
             scale_factor = [
-                s_out / s_in for s_out, s_in in zip(size, input_size[2:])
+                float(s_out / s_in)
+                for s_out, s_in in zip(size, input_size[2:])
             ]
         return BicubicInterpolate.apply(input, scale_factor, align_corners)
     else:
diff --git a/mmdeploy/pytorch/ops/__init__.py b/mmdeploy/pytorch/ops/__init__.py
index 48f9c90137..0608aadf79 100644
--- a/mmdeploy/pytorch/ops/__init__.py
+++ b/mmdeploy/pytorch/ops/__init__.py
@@ -3,6 +3,7 @@
                                 adaptive_avg_pool2d__default,
                                 adaptive_avg_pool3d__default)
 from .grid_sampler import grid_sampler__default
+from .hardsigmoid import hardsigmoid__default
 from .instance_norm import instance_norm__tensorrt
 from .lstm import generic_rnn__ncnn
 from .squeeze import squeeze__default
@@ -10,5 +11,6 @@
 __all__ = [
     'adaptive_avg_pool1d__default', 'adaptive_avg_pool2d__default',
     'adaptive_avg_pool3d__default', 'grid_sampler__default',
-    'instance_norm__tensorrt', 'generic_rnn__ncnn', 'squeeze__default'
+    'hardsigmoid__default', 'instance_norm__tensorrt', 'generic_rnn__ncnn',
+    'squeeze__default'
 ]
diff --git a/mmdeploy/pytorch/ops/hardsigmoid.py b/mmdeploy/pytorch/ops/hardsigmoid.py
new file mode 100644
index 0000000000..a4d14173ed
--- /dev/null
+++ b/mmdeploy/pytorch/ops/hardsigmoid.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# Modified from:
+# https://github.com/pytorch/pytorch/blob/9ade03959392e5a90b74261012de1d806cab2253/torch/onnx/symbolic_opset9.py
+from mmdeploy.core import SYMBOLIC_REWRITER
+
+
+@SYMBOLIC_REWRITER.register_symbolic(
+    'hardsigmoid', is_pytorch=True, arg_descriptors=['v'])
+def hardsigmoid__default(ctx, g, self):
+    """Support export hardsigmoid This rewrite enable export hardsigmoid in
+    torch<=1.8.2."""
+    return g.op('HardSigmoid', self, alpha_f=1 / 6)
diff --git a/mmdeploy/utils/__init__.py b/mmdeploy/utils/__init__.py
index 03543f9d5f..4847ba7b09 100644
--- a/mmdeploy/utils/__init__.py
+++ b/mmdeploy/utils/__init__.py
@@ -6,9 +6,10 @@
                            get_model_inputs, get_onnx_config,
                            get_partition_config, get_task_type,
                            is_dynamic_batch, is_dynamic_shape, load_config)
-from .constants import SDK_TASK_MAP, Backend, Codebase, Task
+from .constants import IR, SDK_TASK_MAP, Backend, Codebase, Task
 from .device import parse_cuda_device_id, parse_device_id
-from .utils import get_root_logger, target_wrapper
+from .env import get_backend_version, get_codebase_version, get_library_version
+from .utils import get_file_path, get_root_logger, target_wrapper
 
 __all__ = [
     'is_dynamic_batch', 'is_dynamic_shape', 'get_task_type', 'get_codebase',
@@ -18,5 +19,6 @@
     'get_model_inputs', 'cfg_apply_marks', 'get_input_shape',
     'parse_device_id', 'parse_cuda_device_id', 'get_codebase_config',
     'get_backend_config', 'get_root_logger', 'get_dynamic_axes',
-    'target_wrapper', 'SDK_TASK_MAP'
+    'target_wrapper', 'SDK_TASK_MAP', 'get_library_version',
+    'get_codebase_version', 'get_backend_version', 'IR', 'get_file_path'
 ]
diff --git a/mmdeploy/utils/config_utils.py b/mmdeploy/utils/config_utils.py
index f1842a6672..3aab4e29e4 100644
--- a/mmdeploy/utils/config_utils.py
+++ b/mmdeploy/utils/config_utils.py
@@ -4,6 +4,7 @@
 import mmcv
 
 from .constants import Backend, Codebase, Task
+from .utils import deprecate
 
 
 def load_config(*args) -> List[mmcv.Config]:
@@ -126,6 +127,7 @@ def get_ir_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict:
     return ir_config
 
 
+@deprecate(dst_obj=get_ir_config)
 def get_onnx_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict:
     """Get the onnx parameters in export() from config.
 
@@ -135,7 +137,6 @@ def get_onnx_config(deploy_cfg: Union[str, mmcv.Config]) -> Dict:
     Returns:
         Dict: The config dictionary of onnx parameters
     """
-
     onnx_config = get_ir_config(deploy_cfg=deploy_cfg)
     ir_type = onnx_config.get('type', None)
     assert ir_type is None or ir_type == 'onnx', 'Expect IR type is ONNX,'\
@@ -193,6 +194,10 @@ def is_dynamic_shape(deploy_cfg: Union[str, mmcv.Config],
         bool: Is config set dynamic shape (axis 2 and 3).
     """
 
+    # Always dynamic for exporting torchscript
+    if get_backend(deploy_cfg) == Backend.TORCHSCRIPT:
+        return True
+
     deploy_cfg = load_config(deploy_cfg)[0]
     ir_config = get_ir_config(deploy_cfg)
 
@@ -353,18 +358,21 @@ def get_dynamic_axes(
             Dictionary with dynamic axes.
     """
     deploy_cfg = load_config(deploy_cfg)[0]
+    ir_config = get_ir_config(deploy_cfg)
+
+    # TODO onnx will be deprecated in the future
     onnx_config = deploy_cfg.get('onnx_config', None)
-    if onnx_config is None:
+    if onnx_config is None and ir_config == {}:
         raise KeyError(
             'Field \'onnx_config\' was not found in \'deploy_cfg\'.')
-    dynamic_axes = onnx_config.get('dynamic_axes', None)
+    dynamic_axes = ir_config.get('dynamic_axes', None)
     if dynamic_axes and not isinstance(dynamic_axes, Dict):
         if axes_names is None:
             axes_names = []
-            input_names = onnx_config.get('input_names', None)
+            input_names = ir_config.get('input_names', None)
             if input_names:
                 axes_names += input_names
-            output_names = onnx_config.get('output_names', None)
+            output_names = ir_config.get('output_names', None)
             if output_names:
                 axes_names += output_names
             if not axes_names:
diff --git a/mmdeploy/utils/constants.py b/mmdeploy/utils/constants.py
index da07cb28e7..c36e610fea 100644
--- a/mmdeploy/utils/constants.py
+++ b/mmdeploy/utils/constants.py
@@ -37,6 +37,13 @@ class Codebase(AdvancedEnum):
     MMPOSE = 'mmpose'
 
 
+class IR(AdvancedEnum):
+    """Define intermediate representation enumerations."""
+    ONNX = 'onnx'
+    TORCHSCRIPT = 'torchscript'
+    DEFAULT = 'default'
+
+
 class Backend(AdvancedEnum):
     """Define backend enumerations."""
     PYTORCH = 'pytorch'
@@ -46,6 +53,7 @@ class Backend(AdvancedEnum):
     NCNN = 'ncnn'
     OPENVINO = 'openvino'
     SDK = 'sdk'
+    TORCHSCRIPT = 'torchscript'
     DEFAULT = 'default'
 
 
diff --git a/mmdeploy/utils/env.py b/mmdeploy/utils/env.py
new file mode 100644
index 0000000000..8cc2cbd3d5
--- /dev/null
+++ b/mmdeploy/utils/env.py
@@ -0,0 +1,49 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import importlib
+
+from mmdeploy.utils import Codebase
+
+
+def get_library_version(lib):
+    """Try to get the version of a library if it has been installed.
+
+    Args:
+        lib (str): The name of library.
+
+    Returns:
+        None | str: If the library has been installed, return version.
+    """
+    try:
+        lib = importlib.import_module(lib)
+    except Exception:
+        version = None
+    else:
+        version = lib.__version__
+
+    return version
+
+
+def get_codebase_version():
+    """Get the version dictionary of all supported codebases.
+
+    Returns:
+        Dict: The name and the version of supported codebases.
+    """
+    version_dict = dict()
+    for enum in Codebase:
+        codebase = enum.value
+        version_dict[codebase] = get_library_version(codebase)
+    return version_dict
+
+
+def get_backend_version():
+    """Get the version dictionary of some supported backend.
+
+    Returns:
+        Dict: The name and the version of some supported backend.
+    """
+    backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn']
+    version_dict = dict()
+    for backend in backend_library_list:
+        version_dict[backend] = get_library_version(backend)
+    return version_dict
diff --git a/mmdeploy/utils/export_info.py b/mmdeploy/utils/export_info.py
index 8c5466f892..2eaf38da1f 100644
--- a/mmdeploy/utils/export_info.py
+++ b/mmdeploy/utils/export_info.py
@@ -1,14 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import importlib
+import re
 from typing import Dict, List, Tuple, Union
 
 import mmcv
 
 from mmdeploy.apis import build_task_processor
 from mmdeploy.utils import (Backend, Task, get_backend, get_codebase,
-                            get_common_config, get_onnx_config,
-                            get_root_logger, get_task_type, is_dynamic_batch,
-                            load_config)
+                            get_common_config, get_ir_config, get_root_logger,
+                            get_task_type, is_dynamic_batch, load_config)
 from mmdeploy.utils.constants import SDK_TASK_MAP as task_map
 
 
@@ -89,12 +89,25 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
     """
     name, _ = get_model_name_customs(deploy_cfg, model_cfg, work_dir)
     precision = 'FP32'
-    onnx_name = get_onnx_config(deploy_cfg)['save_file']
-    net = onnx_name
+    ir_name = get_ir_config(deploy_cfg)['save_file']
+    net = ir_name
     weights = ''
     backend = get_backend(deploy_cfg=deploy_cfg)
+
+    def replace_suffix(file_name: str, dst_suffix: str) -> str:
+        """Replace the suffix to the destination one.
+
+        Args:
+            file_name (str): The file name to be operated.
+            dst_suffix (str): The destination suffix.
+
+        Return:
+            str: The file name of which the suffix has been replaced.
+        """
+        return re.sub(r'\.[a-z]+', dst_suffix, file_name)
+
     if backend == Backend.TENSORRT:
-        net = onnx_name.replace('.onnx', '.engine')
+        net = replace_suffix(ir_name, '.engine')
         common_cfg = get_common_config(deploy_cfg)
         fp16_mode = common_cfg.get('fp16_mode', False)
         int8_mode = common_cfg.get('int8_mode', False)
@@ -104,15 +117,15 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
             precision = 'INT8'
     elif backend == Backend.PPLNN:
         precision = 'FP16'
-        weights = onnx_name.replace('.onnx', '.json')
-        net = onnx_name
+        weights = replace_suffix(ir_name, '.json')
+        net = ir_name
     elif backend == Backend.OPENVINO:
-        net = onnx_name.replace('.onnx', '.xml')
-        weights = onnx_name.replace('.onnx', '.bin')
+        net = replace_suffix(ir_name, '.xml')
+        weights = replace_suffix(ir_name, '.bin')
     elif backend == Backend.NCNN:
-        net = onnx_name.replace('.onnx', '.param')
-        weights = onnx_name.replace('.onnx', '.bin')
-    elif backend == Backend.ONNXRUNTIME:
+        net = replace_suffix(ir_name, '.param')
+        weights = replace_suffix(ir_name, '.bin')
+    elif backend in [Backend.ONNXRUNTIME, Backend.TORCHSCRIPT]:
         pass
     else:
         raise NotImplementedError(f'Not supported backend: {backend.value}.')
@@ -149,8 +162,8 @@ def get_inference_info(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config,
     module = 'Net'
     input = ['prep_output']
     output = ['infer_output']
-    onnx_config = get_onnx_config(deploy_cfg)
-    input_names = onnx_config.get('input_names', None)
+    ir_config = get_ir_config(deploy_cfg)
+    input_names = ir_config.get('input_names', None)
     input_name = input_names[0] if input_names else 'input'
     input_map = dict(img=input_name)
     return dict(
@@ -320,14 +333,14 @@ def get_detail(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config,
     codebase['pth'] = pth
     codebase['config'] = model_cfg.filename
     codebase_config = deploy_cfg.get('codebase_config', dict())
-    onnx_config = get_onnx_config(deploy_cfg)
+    ir_config = get_ir_config(deploy_cfg)
     backend_config = deploy_cfg.get('backend_config', dict())
     calib_config = deploy_cfg.get('calib_config', dict())
     return dict(
         version=version,
         codebase=codebase,
         codebase_config=codebase_config,
-        onnx_config=onnx_config,
+        onnx_config=ir_config,
         backend_config=backend_config,
         calib_config=calib_config)
 
diff --git a/mmdeploy/utils/test.py b/mmdeploy/utils/test.py
index 3fd7457f0c..c912f14821 100644
--- a/mmdeploy/utils/test.py
+++ b/mmdeploy/utils/test.py
@@ -14,7 +14,7 @@
 
 import mmdeploy.codebase  # noqa: F401,F403
 from mmdeploy.core import RewriterContext, patch_model
-from mmdeploy.utils import (Backend, get_backend, get_dynamic_axes,
+from mmdeploy.utils import (IR, Backend, get_backend, get_dynamic_axes,
                             get_ir_config, get_onnx_config)
 
 
@@ -93,6 +93,8 @@ def check_backend(backend: Backend, require_plugin: bool = False):
             from mmdeploy.apis.ncnn import is_plugin_available
     elif backend == Backend.OPENVINO:
         from mmdeploy.apis.openvino import is_available
+    elif backend == Backend.TORCHSCRIPT:
+        from mmdeploy.backend.torchscript import ops_available as is_available
     else:
         warnings.warn('The backend checker is not available')
         return
@@ -375,14 +377,40 @@ def get_onnx_model(wrapped_model: nn.Module,
     return onnx_file_path
 
 
-def get_backend_outputs(onnx_file_path: str,
+def get_ts_model(wrapped_model: nn.Module,
+                 model_inputs: Dict[str, Union[Tuple, List, torch.Tensor]],
+                 deploy_cfg: mmcv.Config) -> str:
+    """To get path to onnx model after export.
+
+    Args:
+        wrapped_model (nn.Module): The input model.
+        model_inputs (dict): Inputs for model.
+        deploy_cfg (mmcv.Config): Deployment config.
+
+    Returns:
+        str: The path to the TorchScript model file.
+    """
+    ir_file_path = tempfile.NamedTemporaryFile(suffix='.pt').name
+    backend = get_backend(deploy_cfg)
+    patched_model = patch_model(
+        wrapped_model, cfg=deploy_cfg, backend=backend.value)
+
+    from mmdeploy.apis.pytorch2torchscript import torch2torchscript_impl
+    torch2torchscript_impl(
+        patched_model, [v for _, v in model_inputs.items()],
+        deploy_cfg=deploy_cfg,
+        output_file=ir_file_path)
+    return ir_file_path
+
+
+def get_backend_outputs(ir_file_path: str,
                         model_inputs: Dict[str, Union[Tuple, List,
                                                       torch.Tensor]],
                         deploy_cfg: mmcv.Config) -> Union[Any, None]:
     """To get backend outputs of model.
 
     Args:
-        onnx_file_path (str): The path to the ONNX file.
+        ir_file_path (str): The path to the IR file.
         model_inputs (dict): Inputs for model.
         deploy_cfg (mmcv.Config): Deployment config.
 
@@ -408,7 +436,7 @@ def get_backend_outputs(onnx_file_path: str,
             trt_file_path,
             0,
             deploy_cfg=deploy_cfg,
-            onnx_model=onnx_file_path)
+            onnx_model=ir_file_path)
         backend_files = [trt_file_path]
         for k, v in model_inputs.items():
             model_inputs[k] = model_inputs[k].cuda()
@@ -441,7 +469,7 @@ def get_backend_outputs(onnx_file_path: str,
                 backend_feats[input_names[i]] = feature_list[i]
             else:
                 backend_feats[str(i)] = feature_list[i]
-        backend_files = [onnx_file_path]
+        backend_files = [ir_file_path]
         device = 'cpu'
     elif backend == Backend.NCNN:
         import mmdeploy.apis.ncnn as ncnn_apis
@@ -449,8 +477,8 @@ def get_backend_outputs(onnx_file_path: str,
             return None
         work_dir = tempfile.TemporaryDirectory().name
         param_path, bin_path = ncnn_apis.get_output_model_file(
-            onnx_file_path, work_dir)
-        ncnn_apis.onnx2ncnn(onnx_file_path, param_path, bin_path)
+            ir_file_path, work_dir)
+        ncnn_apis.onnx2ncnn(ir_file_path, param_path, bin_path)
         backend_files = [param_path, bin_path]
         backend_feats = flatten_model_inputs
         device = 'cpu'
@@ -461,25 +489,34 @@ def get_backend_outputs(onnx_file_path: str,
             return None
         openvino_work_dir = tempfile.TemporaryDirectory().name
         openvino_file_path = openvino_apis.get_output_model_file(
-            onnx_file_path, openvino_work_dir)
+            ir_file_path, openvino_work_dir)
         input_info = {
             name: value.shape
             for name, value in flatten_model_inputs.items()
         }
-        openvino_apis.onnx2openvino(input_info, output_names, onnx_file_path,
+        openvino_apis.onnx2openvino(input_info, output_names, ir_file_path,
                                     openvino_work_dir)
         backend_files = [openvino_file_path]
         backend_feats = flatten_model_inputs
         device = 'cpu'
+
     elif backend == Backend.DEFAULT:
         return None
+    elif backend == Backend.TORCHSCRIPT:
+        backend_files = [ir_file_path]
+        device = 'cpu'
+        backend_feats = [v for _, v in model_inputs.items()]
     else:
         raise NotImplementedError(
             f'Unimplemented backend type: {backend.value}')
 
     from mmdeploy.codebase.base import BaseBackendModel
-    backend_model = BaseBackendModel._build_wrapper(backend, backend_files,
-                                                    device, output_names)
+    backend_model = BaseBackendModel._build_wrapper(
+        backend,
+        backend_files,
+        device,
+        input_names=input_names,
+        output_names=output_names)
     with torch.no_grad():
         backend_outputs = backend_model(backend_feats)
     backend_outputs = backend_model.output_to_list(backend_outputs)
@@ -511,11 +548,15 @@ def get_rewrite_outputs(wrapped_model: nn.Module,
             cfg=deploy_cfg, backend=backend.value, opset=11), torch.no_grad():
         ctx_outputs = wrapped_model(**model_inputs)
 
-    onnx_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg)
+    ir_type = get_ir_config(deploy_cfg).get('type', None)
+    if ir_type == IR.TORCHSCRIPT.value:
+        ir_file_path = get_ts_model(wrapped_model, model_inputs, deploy_cfg)
+    else:  # TODO onnx as default, make it strict when more IR types involved
+        ir_file_path = get_onnx_model(wrapped_model, model_inputs, deploy_cfg)
 
     backend_outputs = None
     if run_with_backend:
-        backend_outputs = get_backend_outputs(onnx_file_path, model_inputs,
+        backend_outputs = get_backend_outputs(ir_file_path, model_inputs,
                                               deploy_cfg)
 
     if backend_outputs is None:
diff --git a/mmdeploy/utils/utils.py b/mmdeploy/utils/utils.py
index 9917dd4770..10d6d02bd0 100644
--- a/mmdeploy/utils/utils.py
+++ b/mmdeploy/utils/utils.py
@@ -1,8 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import glob
 import logging
+import os
 import sys
 import traceback
-from typing import Callable, Optional
+from typing import Callable, Optional, Union
 
 import torch.multiprocessing as mp
 from mmcv.utils import get_logger
@@ -56,3 +58,68 @@ def get_root_logger(log_file=None, log_level=logging.INFO) -> logging.Logger:
         name='mmdeploy', log_file=log_file, log_level=log_level)
 
     return logger
+
+
+def deprecate(status: str = 'future',
+              dst_obj: Optional[Union[object, str]] = None,
+              msg: str = '',
+              *args,
+              **kwargs) -> None:
+    """Deprecate a function or a class.
+
+    Args:
+        status (str, optional): The status of the function or class.
+            Defaults to future.
+        dst_obj (str, object, optional): The object that will replace
+            the original one. Defaults to None.
+        msg (str): Additional message to be printed.
+
+    Examples:
+        >>> from math import ceil
+        >>> from mmdeploy.utils.utils import deprecate
+        >>> @deprecate(status='past', dst_obj=ceil, msg='')
+        >>> def my_ceil(num):
+        >>>     num = num if(num==int(num)) else int(num) + 1
+        >>>     return num
+    """
+    logger = get_root_logger()
+
+    def _register(src_obj):
+
+        def fun(*args, **kwargs):
+            if status == 'future':
+                logger.warning(
+                    f'DeprecationWarning: {src_obj.__name__} will be '
+                    f'deprecated in the future. {msg}')
+            elif status == 'past':
+                assert dst_obj is not None, 'for deprecated object, there'
+                ' must be a destination object'
+                logger.warning(
+                    f'DeprecationWarning: {src_obj.__name__} was deprecated,'
+                    f' use {dst_obj.__name__} instead. {msg}')
+            else:
+                raise KeyError(f'Unexpected key {status}')
+            result = src_obj(*args, **kwargs)
+            return result
+
+        return fun
+
+    return _register
+
+
+def get_file_path(prefix, candidates) -> str:
+    """Search for file in candidates.
+
+    Args:
+        prefix (str): Prefix of the paths.
+        cancidates (str): Candidate paths
+    Returns:
+        str: file path or '' if not found
+    """
+    for candidate in candidates:
+        wildcard = os.path.abspath(os.path.join(prefix, candidate))
+        paths = glob.glob(wildcard)
+        if paths:
+            lib_path = paths[0]
+            return lib_path
+    return ''
diff --git a/mmdeploy/version.py b/mmdeploy/version.py
index 106378f190..821f44df9c 100644
--- a/mmdeploy/version.py
+++ b/mmdeploy/version.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Tuple
 
-__version__ = '0.2.0'
+__version__ = '0.4.0'
 short_version = __version__
 
 
diff --git a/tests/test_apis/test_onnx2ncnn.py b/tests/test_apis/test_onnx2ncnn.py
index 57199e37ff..8073b77548 100644
--- a/tests/test_apis/test_onnx2ncnn.py
+++ b/tests/test_apis/test_onnx2ncnn.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import os.path as osp
 import tempfile
 
diff --git a/tests/test_apis/test_torch2onnx.py b/tests/test_apis/test_torch2onnx.py
index 16dc2b0d50..349a9c642a 100644
--- a/tests/test_apis/test_torch2onnx.py
+++ b/tests/test_apis/test_torch2onnx.py
@@ -53,9 +53,8 @@ def get_deploy_cfg(input_name, output_name, dynamic_axes):
                 input_names=[input_name],
                 output_names=[output_name],
                 input_shape=None),
-            codebase_config=dict(type='mmedit', task=''),  # useless
-            backend_config=dict(type='onnxruntime')  # useless
-        ))
+            codebase_config=dict(type='mmedit', task=''),
+            backend_config=dict(type='onnxruntime')))
 
 
 @pytest.mark.parametrize('input_name', [input_name])
diff --git a/tests/test_apis/test_torch2torchscript.py b/tests/test_apis/test_torch2torchscript.py
new file mode 100644
index 0000000000..4bb1c5c998
--- /dev/null
+++ b/tests/test_apis/test_torch2torchscript.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import importlib
+import os.path as osp
+import tempfile
+
+import mmcv
+import pytest
+
+from mmdeploy.apis import torch2torchscript
+from mmdeploy.utils import IR, Backend
+from mmdeploy.utils.test import get_random_name
+
+ts_file = tempfile.NamedTemporaryFile(suffix='.pt').name
+input_name = get_random_name()
+output_name = get_random_name()
+
+
+def get_deploy_cfg(input_name, output_name):
+    return mmcv.Config(
+        dict(
+            ir_config=dict(
+                type=IR.TORCHSCRIPT.value,
+                input_names=[input_name],
+                output_names=[output_name],
+                input_shape=None),
+            codebase_config=dict(type='mmedit', task='SuperResolution'),
+            backend_config=dict(type=Backend.TORCHSCRIPT.value)))
+
+
+def get_model_cfg():
+    return mmcv.Config(
+        dict(
+            model=dict(
+                pretrained=None,
+                type='BasicRestorer',
+                generator=dict(
+                    type='RRDBNet',
+                    in_channels=3,
+                    out_channels=3,
+                    mid_channels=64,
+                    num_blocks=23,
+                    growth_channels=32),
+                pixel_loss=dict(
+                    type='L1Loss', loss_weight=1.0, reduction='mean')),
+            test_cfg=dict(metrics='PSNR'),
+            test_pipeline=[
+                dict(
+                    type='LoadImageFromFile',
+                    io_backend='disk',
+                    key='lq',
+                    flag='unchanged'),
+                dict(
+                    type='LoadImageFromFile',
+                    io_backend='disk',
+                    key='gt',
+                    flag='unchanged'),
+                dict(type='RescaleToZeroOne', keys=['lq', 'gt']),
+                dict(
+                    type='Normalize',
+                    keys=['lq', 'gt'],
+                    mean=[0, 0, 0],
+                    std=[1, 1, 1],
+                    to_rgb=True),
+                dict(
+                    type='Collect',
+                    keys=['lq', 'gt'],
+                    meta_keys=['lq_path', 'lq_path']),
+                dict(type='ImageToTensor', keys=['lq', 'gt'])
+            ]))
+
+
+@pytest.mark.parametrize('input_name', [input_name])
+@pytest.mark.parametrize('output_name', [output_name])
+@pytest.mark.skipif(
+    not importlib.util.find_spec('mmedit'), reason='requires mmedit')
+def test_torch2torchscript(input_name, output_name):
+    import numpy as np
+    deploy_cfg = get_deploy_cfg(input_name, output_name)
+    torch2torchscript(
+        np.random.rand(8, 8, 3),
+        '',
+        ts_file,
+        deploy_cfg,
+        model_cfg=get_model_cfg(),
+        device='cpu')
+
+    assert osp.exists(ts_file)
diff --git a/tests/test_backend/test_wrapper.py b/tests/test_backend/test_wrapper.py
index f42d915131..b177a2ee58 100644
--- a/tests/test_backend/test_wrapper.py
+++ b/tests/test_backend/test_wrapper.py
@@ -10,6 +10,7 @@
 from mmdeploy.utils.test import check_backend
 
 onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name
+ts_file = tempfile.NamedTemporaryFile(suffix='.pt').name
 test_img = torch.rand(1, 3, 8, 8)
 output_names = ['output']
 input_names = ['input']
@@ -44,6 +45,18 @@ def generate_onnx_file():
             dynamic_axes=None)
 
 
+@pytest.fixture(autouse=True, scope='module')
+def generate_torchscript_file():
+    import mmcv
+
+    from mmdeploy.apis import torch2torchscript_impl
+    deploy_cfg = mmcv.Config(
+        {'backend_config': dict(type=Backend.TORCHSCRIPT.value)})
+    with torch.no_grad():
+        torch2torchscript_impl(model, torch.rand(1, 3, 8, 8), deploy_cfg,
+                               ts_file)
+
+
 def onnx2backend(backend, onnx_file):
     if backend == Backend.TENSORRT:
         from mmdeploy.backend.tensorrt import (create_trt_engine,
@@ -107,6 +120,11 @@ def create_wrapper(backend, model_files):
         from mmdeploy.backend.openvino import OpenVINOWrapper
         openvino_model = OpenVINOWrapper(model_files, output_names)
         return openvino_model
+    elif backend == Backend.TORCHSCRIPT:
+        from mmdeploy.backend.torchscript import TorchscriptWrapper
+        torchscript_model = TorchscriptWrapper(
+            model_files, input_names=input_names, output_names=output_names)
+        return torchscript_model
     else:
         raise NotImplementedError(f'Unknown backend type: {backend.value}')
 
@@ -134,20 +152,26 @@ def run_wrapper(backend, wrapper, input):
         results = wrapper({'input': input})['output']
         results = results.detach().cpu()
         return results
+    elif backend == Backend.TORCHSCRIPT:
+        results = wrapper({'input': input})['output']
+        return results
     else:
         raise NotImplementedError(f'Unknown backend type: {backend.value}')
 
 
 ALL_BACKEND = [
     Backend.TENSORRT, Backend.ONNXRUNTIME, Backend.PPLNN, Backend.NCNN,
-    Backend.OPENVINO
+    Backend.OPENVINO, Backend.TORCHSCRIPT
 ]
 
 
 @pytest.mark.parametrize('backend', ALL_BACKEND)
 def test_wrapper(backend):
     check_backend(backend, True)
-    model_files = onnx2backend(backend, onnx_file)
+    if backend == Backend.TORCHSCRIPT:
+        model_files = ts_file
+    else:
+        model_files = onnx2backend(backend, onnx_file)
     assert model_files is not None
     wrapper = create_wrapper(backend, model_files)
     assert wrapper is not None
diff --git a/tests/test_codebase/test_mmdet/test_mmdet_models.py b/tests/test_codebase/test_mmdet/test_mmdet_models.py
index 9cfe6e83d9..f299e18641 100644
--- a/tests/test_codebase/test_mmdet/test_mmdet_models.py
+++ b/tests/test_codebase/test_mmdet/test_mmdet_models.py
@@ -11,6 +11,7 @@
 
 from mmdeploy.codebase import import_codebase
 from mmdeploy.utils import Backend, Codebase
+from mmdeploy.utils.config_utils import get_ir_config
 from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs,
                                  get_rewrite_outputs)
 
@@ -157,6 +158,31 @@ def get_single_roi_extractor():
     return model
 
 
+def get_gfl_head_model():
+    test_cfg = mmcv.Config(
+        dict(
+            nms_pre=1000,
+            min_bbox_size=0,
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.6),
+            max_per_img=100))
+    anchor_generator = dict(
+        type='AnchorGenerator',
+        scales_per_octave=1,
+        octave_base_scale=8,
+        ratios=[1.0],
+        strides=[8, 16, 32, 64, 128])
+    from mmdet.models.dense_heads import GFLHead
+    model = GFLHead(
+        num_classes=3,
+        in_channels=256,
+        reg_max=3,
+        test_cfg=test_cfg,
+        anchor_generator=anchor_generator)
+    model.requires_grad_(False)
+    return model
+
+
 def test_focus_forward_ncnn():
     backend_type = Backend.NCNN
     check_backend(backend_type)
@@ -196,6 +222,7 @@ def test_l2norm_forward(backend_type):
         dict(
             backend_config=dict(type=backend_type.value),
             onnx_config=dict(input_shape=None)))
+    seed_everything(1234)
     feat = torch.rand(1, 16, s, s)
     model_outputs = [l2norm_neck.forward(feat)]
     wrapped_model = WrapModel(l2norm_neck, 'forward')
@@ -349,6 +376,88 @@ def test_get_bboxes_of_rpn_head(backend_type: Backend):
     assert rewrite_outputs is not None
 
 
+@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME])
+def test_get_bboxes_of_gfl_head(backend_type):
+    check_backend(backend_type)
+    head = get_gfl_head_model()
+    head.cpu().eval()
+    s = 4
+    img_metas = [{
+        'scale_factor': np.ones(4),
+        'pad_shape': (s, s, 3),
+        'img_shape': (s, s, 3)
+    }]
+    output_names = ['dets']
+    deploy_cfg = mmcv.Config(
+        dict(
+            backend_config=dict(type=backend_type.value),
+            onnx_config=dict(output_names=output_names, input_shape=None),
+            codebase_config=dict(
+                type='mmdet',
+                task='ObjectDetection',
+                model_type='ncnn_end2end',
+                post_processing=dict(
+                    score_threshold=0.05,
+                    iou_threshold=0.5,
+                    max_output_boxes_per_class=200,
+                    pre_top_k=5000,
+                    keep_top_k=100,
+                    background_label_id=-1,
+                ))))
+
+    seed_everything(1234)
+    cls_score = [
+        torch.rand(1, 3, pow(2, i), pow(2, i)) for i in range(5, 0, -1)
+    ]
+    seed_everything(5678)
+    bboxes = [torch.rand(1, 16, pow(2, i), pow(2, i)) for i in range(5, 0, -1)]
+
+    # to get outputs of onnx model after rewrite
+    img_metas[0]['img_shape'] = torch.Tensor([s, s])
+    wrapped_model = WrapModel(
+        head, 'get_bboxes', img_metas=img_metas, with_nms=True)
+    rewrite_inputs = {
+        'cls_scores': cls_score,
+        'bbox_preds': bboxes,
+    }
+    # do not run with ncnn backend
+    run_with_backend = False if backend_type in [Backend.NCNN] else True
+    rewrite_outputs, is_backend_output = get_rewrite_outputs(
+        wrapped_model=wrapped_model,
+        model_inputs=rewrite_inputs,
+        deploy_cfg=deploy_cfg,
+        run_with_backend=run_with_backend)
+    assert rewrite_outputs is not None
+
+
+@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME])
+def test_forward_of_gfl_head(backend_type):
+    check_backend(backend_type)
+    head = get_gfl_head_model()
+    head.cpu().eval()
+    deploy_cfg = mmcv.Config(
+        dict(
+            backend_config=dict(type=backend_type.value),
+            onnx_config=dict(input_shape=None)))
+    feats = [torch.rand(1, 256, pow(2, i), pow(2, i)) for i in range(5, 0, -1)]
+    model_outputs = [head.forward(feats)]
+    wrapped_model = WrapModel(head, 'forward')
+    rewrite_inputs = {
+        'feats': feats,
+    }
+    rewrite_outputs, is_backend_output = get_rewrite_outputs(
+        wrapped_model=wrapped_model,
+        model_inputs=rewrite_inputs,
+        deploy_cfg=deploy_cfg)
+    model_outputs[0] = [*model_outputs[0][0], *model_outputs[0][1]]
+    for model_output, rewrite_output in zip(model_outputs[0],
+                                            rewrite_outputs[0]):
+        model_output = model_output.squeeze().cpu().numpy()
+        rewrite_output = rewrite_output.squeeze()
+        assert np.allclose(
+            model_output, rewrite_output, rtol=1e-03, atol=1e-05)
+
+
 def _replace_r50_with_r18(model):
     """Replace ResNet50 with ResNet18 in config."""
     model = copy.deepcopy(model)
@@ -1119,26 +1228,14 @@ def test_get_bboxes_of_vfnet_head(backend_type: Backend):
         assert rewrite_outputs is not None
 
 
-@pytest.mark.parametrize('backend_type',
-                         [Backend.ONNXRUNTIME, Backend.OPENVINO])
-def test_base_dense_head_get_bboxes(backend_type: Backend):
-    """Test get_bboxes rewrite of base dense head."""
-    check_backend(backend_type)
-    anchor_head = get_anchor_head_model()
-    anchor_head.cpu().eval()
-    s = 128
-    img_metas = [{
-        'scale_factor': np.ones(4),
-        'pad_shape': (s, s, 3),
-        'img_shape': (s, s, 3)
-    }]
-
-    output_names = ['dets', 'labels']
-
-    deploy_cfg = mmcv.Config(
+def get_deploy_cfg(backend_type: Backend, ir_type: str):
+    return mmcv.Config(
         dict(
             backend_config=dict(type=backend_type.value),
-            onnx_config=dict(output_names=output_names, input_shape=None),
+            onnx_config=dict(
+                type=ir_type,
+                output_names=['dets', 'labels'],
+                input_shape=None),
             codebase_config=dict(
                 type='mmdet',
                 task='ObjectDetection',
@@ -1151,6 +1248,26 @@ def test_base_dense_head_get_bboxes(backend_type: Backend):
                     background_label_id=-1,
                 ))))
 
+
+@pytest.mark.parametrize('backend_type, ir_type',
+                         [(Backend.ONNXRUNTIME, 'onnx'),
+                          (Backend.OPENVINO, 'onnx'),
+                          (Backend.TORCHSCRIPT, 'torchscript')])
+def test_base_dense_head_get_bboxes(backend_type: Backend, ir_type: str):
+    """Test get_bboxes rewrite of base dense head."""
+    check_backend(backend_type)
+    anchor_head = get_anchor_head_model()
+    anchor_head.cpu().eval()
+    s = 128
+    img_metas = [{
+        'scale_factor': np.ones(4),
+        'pad_shape': (s, s, 3),
+        'img_shape': (s, s, 3)
+    }]
+
+    deploy_cfg = get_deploy_cfg(backend_type, ir_type)
+    output_names = get_ir_config(deploy_cfg).get('output_names', None)
+
     # the cls_score's size: (1, 36, 32, 32), (1, 36, 16, 16),
     # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2).
     # the bboxes's size: (1, 36, 32, 32), (1, 36, 16, 16),
diff --git a/tests/test_core/test_function_rewriter.py b/tests/test_core/test_function_rewriter.py
index b9b43fb688..97a814e929 100644
--- a/tests/test_core/test_function_rewriter.py
+++ b/tests/test_core/test_function_rewriter.py
@@ -3,7 +3,8 @@
 
 from mmdeploy.core import FUNCTION_REWRITER, RewriterContext
 from mmdeploy.core.rewriters.function_rewriter import FunctionRewriter
-from mmdeploy.utils.constants import Backend
+from mmdeploy.core.rewriters.rewriter_utils import collect_env
+from mmdeploy.utils.constants import IR, Backend
 
 
 def test_function_rewriter():
@@ -97,7 +98,6 @@ def test_rewrite_homonymic_functions(self):
         assert package.module.func() == 1
 
         function_rewriter = FunctionRewriter()
-        function_rewriter.add_backend(Backend.NCNN.value)
 
         @function_rewriter.register_rewriter(func_name=path1)
         def func_2(ctx):
@@ -108,7 +108,7 @@ def func_2(ctx):
         def func_3(ctx):
             return 3
 
-        function_rewriter.enter(backend=Backend.NCNN.value)
+        function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT))
         # This is a feature
         assert package.func() == 2
         assert package.module.func() == 3
@@ -118,7 +118,6 @@ def func_3(ctx):
         assert package.module.func() == 1
 
         function_rewriter2 = FunctionRewriter()
-        function_rewriter2.add_backend(Backend.NCNN.value)
 
         @function_rewriter2.register_rewriter(
             func_name=path1, backend=Backend.NCNN.value)
@@ -129,7 +128,7 @@ def func_4(ctx):
         def func_5(ctx):
             return 5
 
-        function_rewriter2.enter(backend=Backend.NCNN.value)
+        function_rewriter2.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT))
         # This is a feature
         assert package.func() == 4
         assert package.module.func() == 5
@@ -146,7 +145,6 @@ def test_rewrite_homonymic_methods(self):
         c = package.C()
 
         function_rewriter = FunctionRewriter()
-        function_rewriter.add_backend(Backend.NCNN.value)
 
         assert c.method() == 1
 
@@ -159,14 +157,13 @@ def func_2(ctx, self):
         def func_3(ctx, self):
             return 3
 
-        function_rewriter.enter(backend=Backend.NCNN.value)
+        function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT))
         assert c.method() == 3
         function_rewriter.exit()
 
         assert c.method() == 1
 
         function_rewriter2 = FunctionRewriter()
-        function_rewriter2.add_backend(Backend.NCNN.value)
 
         @function_rewriter2.register_rewriter(
             func_name=path1, backend=Backend.NCNN.value)
@@ -177,7 +174,7 @@ def func_4(ctx, self):
         def func_5(ctx, self):
             return 5
 
-        function_rewriter2.enter(backend=Backend.NCNN.value)
+        function_rewriter2.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT))
         assert c.method() == 4
         function_rewriter2.exit()
 
@@ -196,7 +193,6 @@ def test_rewrite_derived_methods():
     assert derived_obj.method() == 1
 
     function_rewriter = FunctionRewriter()
-    function_rewriter.add_backend(Backend.NCNN.value)
 
     @function_rewriter.register_rewriter(func_name=path1)
     def func_2(ctx, self):
@@ -207,12 +203,12 @@ def func_2(ctx, self):
     def func_3(ctx, self):
         return 3
 
-    function_rewriter.enter()
+    function_rewriter.enter(env=collect_env(Backend.DEFAULT, ir=IR.DEFAULT))
     assert base_obj.method() == 2
     assert derived_obj.method() == 2
     function_rewriter.exit()
 
-    function_rewriter.enter(backend=Backend.NCNN.value)
+    function_rewriter.enter(env=collect_env(Backend.NCNN, ir=IR.DEFAULT))
     assert base_obj.method() == 2
     assert derived_obj.method() == 3
     function_rewriter.exit()
@@ -221,7 +217,7 @@ def func_3(ctx, self):
     assert derived_obj.method() == 1
 
     # Check if the recovery is correct
-    function_rewriter.enter()
+    function_rewriter.enter(env=collect_env(Backend.DEFAULT, ir=IR.DEFAULT))
     assert base_obj.method() == 2
     assert derived_obj.method() == 2
     function_rewriter.exit()
diff --git a/tests/test_core/test_mark.py b/tests/test_core/test_mark.py
index 5c0990f90f..fb85472ee1 100644
--- a/tests/test_core/test_mark.py
+++ b/tests/test_core/test_mark.py
@@ -4,8 +4,9 @@
 import onnx
 import torch
 
-from mmdeploy.core import mark
+from mmdeploy.core import RewriterContext, mark
 from mmdeploy.core.optimizers import attribute_to_dict
+from mmdeploy.utils.constants import IR, Backend
 
 output_file = tempfile.NamedTemporaryFile(suffix='.onnx').name
 
@@ -68,3 +69,9 @@ def forward(self, x, y):
         type='output',
         name='c',
         shape=[2, 3, 4])
+
+    with RewriterContext(
+            cfg=None, backend=Backend.TORCHSCRIPT.value,
+            ir=IR.TORCHSCRIPT), torch.no_grad(), torch.jit.optimized_execution(
+                True):
+        torch.jit.trace(model, (x, y))
diff --git a/tests/test_core/test_rewriter_registry.py b/tests/test_core/test_rewriter_registry.py
deleted file mode 100644
index b577d02623..0000000000
--- a/tests/test_core/test_rewriter_registry.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import pytest
-
-from mmdeploy.core.rewriters.rewriter_utils import RewriterRegistry
-from mmdeploy.utils.constants import Backend
-
-
-def test_check_backend():
-    with pytest.raises(Exception):
-        registry = RewriterRegistry()
-        registry._check_backend(Backend.ONNXRUNTIME.value)
-
-
-def test_add_backend():
-    registry = RewriterRegistry()
-    registry.add_backend(Backend.ONNXRUNTIME.value)
-    assert Backend.ONNXRUNTIME.value in registry._rewrite_records
-    assert Backend.DEFAULT.value in registry._rewrite_records
-    assert Backend.TENSORRT.value not in registry._rewrite_records
-
-
-def test_register_object():
-    registry = RewriterRegistry()
-
-    @registry.register_object('add', backend=Backend.DEFAULT.value)
-    def add(a, b):
-        return a + b
-
-    records = registry._rewrite_records[Backend.DEFAULT.value]
-    assert records is not None
-    assert records['add'] is not None
-    assert records['add']['_object'] is not None
-    add_func = records['add']['_object']
-    assert add_func(123, 456) == 123 + 456
-
-
-def test_get_records():
-    registry = RewriterRegistry()
-    registry.add_backend(Backend.TENSORRT.value)
-
-    @registry.register_object('add', backend=Backend.DEFAULT.value)
-    def add(a, b):
-        return a + b
-
-    @registry.register_object('minus', backend=Backend.DEFAULT.value)
-    def minus(a, b):
-        return a - b
-
-    @registry.register_object('add', backend=Backend.TENSORRT.value)
-    def fake_add(a, b):
-        return a * b
-
-    default_records = dict(registry.get_records(Backend.DEFAULT.value))
-    assert default_records['add']['_object'](1, 1) == 2
-    assert default_records['minus']['_object'](1, 1) == 0
-
-    tensorrt_records = dict(registry.get_records(Backend.TENSORRT.value))
-    assert tensorrt_records['add']['_object'](1, 1) == 1
-    assert tensorrt_records['minus']['_object'](1, 1) == 0
diff --git a/tests/test_core/test_rewriter_utils.py b/tests/test_core/test_rewriter_utils.py
new file mode 100644
index 0000000000..4954a573d8
--- /dev/null
+++ b/tests/test_core/test_rewriter_utils.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmdeploy
+import mmdeploy.core.rewriters.rewriter_utils as rewriter_utils
+from mmdeploy.core.rewriters.rewriter_utils import (BackendChecker,
+                                                    RewriterRegistry,
+                                                    collect_env)
+from mmdeploy.utils.constants import IR, Backend
+
+
+def test_collect_env():
+    env_dict = collect_env(Backend.ONNXRUNTIME, IR.ONNX, version='1.0')
+    assert env_dict['backend'] == Backend.ONNXRUNTIME
+    assert env_dict['ir'] == IR.ONNX
+    assert env_dict['version'] == '1.0'
+    assert env_dict['mmdeploy'] == mmdeploy.__version__
+
+
+class TestChecker:
+    env = collect_env(Backend.ONNXRUNTIME, IR.ONNX)
+
+    def test_backend_checker(self):
+        true_checker = rewriter_utils.BackendChecker(Backend.ONNXRUNTIME)
+        assert true_checker.check(self.env) is True
+
+        false_checker = rewriter_utils.BackendChecker(Backend.TENSORRT)
+        assert false_checker.check(self.env) is False
+
+    def test_ir_checker(self):
+        true_checker = rewriter_utils.IRChecker(IR.ONNX)
+        assert true_checker.check(self.env) is True
+
+        false_checker = rewriter_utils.IRChecker(IR.TORCHSCRIPT)
+        assert false_checker.check(self.env) is False
+
+    def test_lib_version_checker(self):
+        true_checker = rewriter_utils.LibVersionChecker(
+            'mmdeploy', mmdeploy.__version__, mmdeploy.__version__)
+        assert true_checker.check(self.env) is True
+
+        false_checker = rewriter_utils.LibVersionChecker(
+            'mmdeploy', max_version='0.0.0')
+        assert false_checker.check(self.env) is False
+
+
+def test_register_object():
+    registry = RewriterRegistry()
+    checker = rewriter_utils.BackendChecker(Backend.ONNXRUNTIME)
+
+    @registry.register_object(
+        'add',
+        backend=Backend.DEFAULT.value,
+        ir=IR.DEFAULT,
+        extra_checkers=checker)
+    def add(a, b):
+        return a + b
+
+    records = registry._rewrite_records
+    assert records is not None
+    assert records['add'] is not None
+    assert isinstance(records['add'][0]['_checkers'], list)
+    assert isinstance(records['add'][0]['_checkers'][0], BackendChecker)
+    assert records['add'][0]['_object'] is not None
+    add_func = records['add'][0]['_object']
+    assert add_func(123, 456) == 123 + 456
+
+
+def test_get_records():
+    registry = RewriterRegistry()
+
+    @registry.register_object(
+        'get_num', backend=Backend.ONNXRUNTIME.value, ir=IR.ONNX)
+    def get_num_1():
+        return 1
+
+    @registry.register_object(
+        'get_num', backend=Backend.ONNXRUNTIME.value, ir=IR.TORCHSCRIPT)
+    def get_num_2():
+        return 2
+
+    @registry.register_object(
+        'get_num', backend=Backend.TENSORRT.value, ir=IR.ONNX)
+    def get_num_3():
+        return 3
+
+    @registry.register_object(
+        'get_num', backend=Backend.TENSORRT.value, ir=IR.TORCHSCRIPT)
+    def get_num_4():
+        return 4
+
+    @registry.register_object(
+        'get_num', backend=Backend.DEFAULT.value, ir=IR.DEFAULT)
+    def get_num_5():
+        return 5
+
+    records = dict(
+        registry.get_records(collect_env(Backend.ONNXRUNTIME, IR.ONNX)))
+    assert records['get_num']['_object']() == 1
+
+    records = dict(
+        registry.get_records(collect_env(Backend.ONNXRUNTIME, IR.TORCHSCRIPT)))
+    assert records['get_num']['_object']() == 2
+
+    records = dict(
+        registry.get_records(collect_env(Backend.TENSORRT, IR.ONNX)))
+    assert records['get_num']['_object']() == 3
+
+    records = dict(
+        registry.get_records(collect_env(Backend.TENSORRT, IR.TORCHSCRIPT)))
+    assert records['get_num']['_object']() == 4
+
+    records = dict(registry.get_records(collect_env(Backend.NCNN, IR.ONNX)))
+    assert records['get_num']['_object']() == 5
diff --git a/tests/test_csrc/CMakeLists.txt b/tests/test_csrc/CMakeLists.txt
index 3ff7e2d155..34cc0349dd 100644
--- a/tests/test_csrc/CMakeLists.txt
+++ b/tests/test_csrc/CMakeLists.txt
@@ -14,60 +14,63 @@ aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/net NET_TC)
 aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/model MODEL_TC)
 
 set(DEVICE_TC)
-foreach(DEVICE IN LISTS MMDEPLOY_TARGET_DEVICES)
-  list(APPEND DEVICE_TC
-       ${CMAKE_CURRENT_SOURCE_DIR}/device/test_${DEVICE}_device.cpp)
-endforeach()
+foreach (DEVICE IN LISTS MMDEPLOY_TARGET_DEVICES)
+    list(APPEND DEVICE_TC
+            ${CMAKE_CURRENT_SOURCE_DIR}/device/test_${DEVICE}_device.cpp)
+endforeach ()
 
 set(CAPI_TC)
-if("all" IN_LIST MMDEPLOY_CODEBASES)
-  set(TASK_LIST
-      "classifier;detector;segmentor;text_detector;text_recognizer;restorer;model"
-  )
-  set(CODEBASES "mmcls;mmdet;mmseg;mmedit;mmocr")
-else()
-  set(TASK_LIST "model")
-  set(CODEBASES "${MMDEPLOY_CODEBASES}")
-  if("mmcls" IN_LIST MMDEPLOY_CODEBASES)
-    list(APPEND TASK_LIST "classifier")
-  endif()
-  if("mmdet" IN_LIST MMDEPLOY_CODEBASES)
-    list(APPEND TASK_LIST "detector")
-  endif()
-  if("mmseg" IN_LIST MMDEPLOY_CODEBASES)
-    list(APPEND TASK_LIST "segmentor")
-  endif()
-  if("mmedit" IN_LIST MMDEPLOY_CODEBASES)
-    list(APPEND TASK_LIST "restorer")
-  endif()
-  if("mmocr" IN_LIST MMDEPLOY_CODEBASES)
-    list(APPEND TASK_LIST "text_detector")
-    list(APPEND TASK_LIST "text_recognizer")
-  endif()
-endif()
-foreach(TASK ${TASK_LIST})
-  list(APPEND CAPI_TC ${CMAKE_CURRENT_SOURCE_DIR}/capi/test_${TASK}.cpp)
-endforeach()
+if ("all" IN_LIST MMDEPLOY_CODEBASES)
+    set(TASK_LIST
+            "classifier;detector;segmentor;text_detector;text_recognizer;restorer;model"
+            )
+    set(CODEBASES "mmcls;mmdet;mmseg;mmedit;mmocr")
+else ()
+    set(TASK_LIST "model")
+    set(CODEBASES "${MMDEPLOY_CODEBASES}")
+    if ("mmcls" IN_LIST MMDEPLOY_CODEBASES)
+        list(APPEND TASK_LIST "classifier")
+    endif ()
+    if ("mmdet" IN_LIST MMDEPLOY_CODEBASES)
+        list(APPEND TASK_LIST "detector")
+    endif ()
+    if ("mmseg" IN_LIST MMDEPLOY_CODEBASES)
+        list(APPEND TASK_LIST "segmentor")
+    endif ()
+    if ("mmedit" IN_LIST MMDEPLOY_CODEBASES)
+        list(APPEND TASK_LIST "restorer")
+    endif ()
+    if ("mmocr" IN_LIST MMDEPLOY_CODEBASES)
+        list(APPEND TASK_LIST "text_detector")
+        list(APPEND TASK_LIST "text_recognizer")
+    endif ()
+endif ()
+foreach (TASK ${TASK_LIST})
+    list(APPEND CAPI_TC ${CMAKE_CURRENT_SOURCE_DIR}/capi/test_${TASK}.cpp)
+endforeach ()
 
 # generate the header file
 configure_file(config/test_define.h.in
-               ${CMAKE_CURRENT_SOURCE_DIR}/test_define.h)
+        ${CMAKE_CURRENT_SOURCE_DIR}/test_define.h)
 
 set(TC_SRCS
-    ${TC_SRCS}
-    ${ARCHIVE_TC}
-    ${CORE_TC}
-    ${TRANSFORM_TC}
-    ${MODEL_TC}
-    ${NET_TC}
-    ${DEVICE_TC}
-    ${CAPI_TC})
+        ${TC_SRCS}
+        ${ARCHIVE_TC}
+        ${CORE_TC}
+        ${TRANSFORM_TC}
+        ${MODEL_TC}
+        ${NET_TC}
+        ${DEVICE_TC}
+        ${CAPI_TC})
 
 add_executable(mmdeploy_tests ${TC_SRCS})
 target_include_directories(mmdeploy_tests
-                           PRIVATE ${CMAKE_SOURCE_DIR}/third_party/catch2)
+        PRIVATE ${CMAKE_SOURCE_DIR}/third_party/catch2)
 target_include_directories(mmdeploy_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(
-  mmdeploy_tests PRIVATE ${MMDEPLOY_LIBS} ${OpenCV_LIBS}
-        -Wl,--no-as-needed ${MMDEPLOY_DYNAMIC_MODULES} -Wl,--as-need
-        -Wl,--whole-archive ${MMDEPLOY_STATIC_MODULES} -Wl,--no-whole-archive)
+
+mmdeploy_load_static(mmdeploy_tests MMDeployStaticModules)
+mmdeploy_load_dynamic(mmdeploy_tests MMDeployDynamicModules)
+target_link_libraries(mmdeploy_tests PRIVATE
+        MMDeployLibs
+        mmdeploy_transform
+        mmdeploy_opencv_utils)
diff --git a/tests/test_csrc/archive/test_value_archive.cpp b/tests/test_csrc/archive/test_value_archive.cpp
index 9e53f12269..f46316e355 100644
--- a/tests/test_csrc/archive/test_value_archive.cpp
+++ b/tests/test_csrc/archive/test_value_archive.cpp
@@ -1,5 +1,12 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+// clang-format off
+
+#include "catch.hpp"
+
+// clang-format on
+
+#include <array>
 #include <deque>
 #include <iostream>
 #include <list>
@@ -8,10 +15,9 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
-#include "core/utils/formatter.h"
 
 #include "archive/value_archive.h"
-#include "catch.hpp"
+#include "core/utils/formatter.h"
 
 // clang-format off
 
@@ -41,8 +47,8 @@ TEMPLATE_LIST_TEST_CASE("test array-like for value", "[value]", ArrayLikeTypes)
 }
 
 TEST_CASE("test native array for value archive", "[value1]") {
-  const int a[10] = {0,1,2,3,4,5,6,7,8,9};
-  int b[10] = {0,0,0,0,0,0,0,0,0,0};
+  const int a[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  int b[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   mmdeploy::Value value;
   mmdeploy::ValueOutputArchive oa(value);
   oa(a);
diff --git a/tests/test_csrc/capi/test_classifier.cpp b/tests/test_csrc/capi/test_classifier.cpp
index 5e0b0f4136..1b4ff1bbf0 100644
--- a/tests/test_csrc/capi/test_classifier.cpp
+++ b/tests/test_csrc/capi/test_classifier.cpp
@@ -33,11 +33,11 @@ TEST_CASE("test classifier's c api", "[classifier]") {
     ret = mmdeploy_classifier_apply(handle, mats.data(), (int)mats.size(), &results, &result_count);
     REQUIRE(ret == MM_SUCCESS);
     auto result_ptr = results;
-    INFO("model_path: {}", model_path);
+    MMDEPLOY_INFO("model_path: {}", model_path);
     for (auto i = 0; i < (int)mats.size(); ++i) {
-      INFO("the {}-th classification result: ", i);
+      MMDEPLOY_INFO("the {}-th classification result: ", i);
       for (int j = 0; j < *result_count; ++j, ++result_ptr) {
-        INFO("\t label: {}, score: {}", result_ptr->label_id, result_ptr->score);
+        MMDEPLOY_INFO("\t label: {}, score: {}", result_ptr->label_id, result_ptr->score);
       }
     }
 
@@ -46,12 +46,12 @@ TEST_CASE("test classifier's c api", "[classifier]") {
   };
 
   auto gResources = MMDeployTestResources::Get();
-  auto img_lists = gResources.LocateImageResources("mmcls/images");
+  auto img_lists = gResources.LocateImageResources(fs::path{"mmcls"} / "images");
   REQUIRE(!img_lists.empty());
 
   for (auto& backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmcls/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmcls/"} / backend);
       REQUIRE(!model_list.empty());
       for (auto& model_path : model_list) {
         for (auto& device_name : gResources.device_names(backend)) {
diff --git a/tests/test_csrc/capi/test_detector.cpp b/tests/test_csrc/capi/test_detector.cpp
index f7a72e5410..a801d352a1 100644
--- a/tests/test_csrc/capi/test_detector.cpp
+++ b/tests/test_csrc/capi/test_detector.cpp
@@ -6,12 +6,13 @@
 
 #include "apis/c/detector.h"
 #include "core/logger.h"
+#include "core/utils/formatter.h"
 #include "opencv2/opencv.hpp"
 #include "test_resource.h"
-
 using namespace std;
 
 TEST_CASE("test detector's c api", "[detector]") {
+  MMDEPLOY_INFO("test detector");
   auto test = [](const string &device, const string &model_path, const vector<string> &img_list) {
     mm_handle_t handle{nullptr};
     auto ret = mmdeploy_detector_create_by_path(model_path.c_str(), device.c_str(), 0, &handle);
@@ -32,26 +33,30 @@ TEST_CASE("test detector's c api", "[detector]") {
     REQUIRE(ret == MM_SUCCESS);
     auto result_ptr = results;
     for (auto i = 0; i < mats.size(); ++i) {
-      INFO("the '{}-th' image has '{}' objects", i, result_count[i]);
+      MMDEPLOY_INFO("the '{}-th' image has '{}' objects", i, result_count[i]);
       for (auto j = 0; j < result_count[i]; ++j, ++result_ptr) {
         auto &bbox = result_ptr->bbox;
-        INFO(" >> bbox[{}, {}, {}, {}], label_id {}, score {}", bbox.left, bbox.top, bbox.right,
-             bbox.bottom, result_ptr->label_id, result_ptr->score);
+        MMDEPLOY_INFO(" >> bbox[{}, {}, {}, {}], label_id {}, score {}", bbox.left, bbox.top,
+                      bbox.right, bbox.bottom, result_ptr->label_id, result_ptr->score);
       }
     }
     mmdeploy_detector_release_result(results, result_count, (int)mats.size());
     mmdeploy_detector_destroy(handle);
   };
-
-  auto gResources = MMDeployTestResources::Get();
-  auto img_lists = gResources.LocateImageResources("mmdet/images");
+  MMDEPLOY_INFO("get test resources");
+  auto &gResources = MMDeployTestResources::Get();
+  MMDEPLOY_INFO("locate image resources");
+  auto img_lists = gResources.LocateImageResources(fs::path{"mmdet"} / "images");
+  MMDEPLOY_INFO("{}", img_lists.size());
   REQUIRE(!img_lists.empty());
 
   for (auto &backend : gResources.backends()) {
+    MMDEPLOY_INFO("backend: {}", backend);
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmdet/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmdet"} / backend);
       REQUIRE(!model_list.empty());
       for (auto &model_path : model_list) {
+        MMDEPLOY_INFO("model: {}", model_path);
         for (auto &device_name : gResources.device_names(backend)) {
           test(device_name, model_path, img_lists);
         }
diff --git a/tests/test_csrc/capi/test_model.cpp b/tests/test_csrc/capi/test_model.cpp
index af0a983628..d9bab881f3 100644
--- a/tests/test_csrc/capi/test_model.cpp
+++ b/tests/test_csrc/capi/test_model.cpp
@@ -12,7 +12,7 @@ TEST_CASE("test model c capi", "[model]") {
   std::string model_path;
   for (auto const &codebase : gResource.codebases()) {
     for (auto const &backend : gResource.backends()) {
-      if (auto _model_list = gResource.LocateModelResources(codebase + "/" + backend);
+      if (auto _model_list = gResource.LocateModelResources(fs::path{codebase} / backend);
           !_model_list.empty()) {
         model_path = _model_list.front();
         break;
diff --git a/tests/test_csrc/capi/test_restorer.cpp b/tests/test_csrc/capi/test_restorer.cpp
index 502d377021..4e56537174 100644
--- a/tests/test_csrc/capi/test_restorer.cpp
+++ b/tests/test_csrc/capi/test_restorer.cpp
@@ -40,12 +40,12 @@ TEST_CASE("test restorer's c api", "[restorer]") {
   };
 
   auto gResources = MMDeployTestResources::Get();
-  auto img_lists = gResources.LocateImageResources("mmedit/images");
+  auto img_lists = gResources.LocateImageResources(fs::path{"mmedit"} / "images");
   REQUIRE(!img_lists.empty());
 
   for (auto &backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmedit/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmedit"} / backend);
       REQUIRE(!model_list.empty());
       for (auto &model_path : model_list) {
         for (auto &device_name : gResources.device_names(backend)) {
diff --git a/tests/test_csrc/capi/test_segmentor.cpp b/tests/test_csrc/capi/test_segmentor.cpp
index b042d793c5..6de6150bf1 100644
--- a/tests/test_csrc/capi/test_segmentor.cpp
+++ b/tests/test_csrc/capi/test_segmentor.cpp
@@ -43,12 +43,12 @@ TEST_CASE("test segmentor's c api", "[segmentor]") {
   };
 
   auto gResources = MMDeployTestResources::Get();
-  auto img_lists = gResources.LocateImageResources("mmseg/images");
+  auto img_lists = gResources.LocateImageResources(fs::path{"mmseg"} / "images");
   REQUIRE(!img_lists.empty());
 
   for (auto &backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmseg/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmseg"} / backend);
       REQUIRE(!model_list.empty());
       for (auto &model_path : model_list) {
         for (auto &device_name : gResources.device_names(backend)) {
diff --git a/tests/test_csrc/capi/test_text_detector.cpp b/tests/test_csrc/capi/test_text_detector.cpp
index a2bdd84493..af12d14e09 100644
--- a/tests/test_csrc/capi/test_text_detector.cpp
+++ b/tests/test_csrc/capi/test_text_detector.cpp
@@ -34,12 +34,12 @@ TEST_CASE("test text detector's c api", "[text-detector]") {
 
     auto result_ptr = results;
     for (auto i = 0; i < mats.size(); ++i) {
-      INFO("the {}-th image has '{}' objects", i, result_count[i]);
+      MMDEPLOY_INFO("the {}-th image has '{}' objects", i, result_count[i]);
       for (auto j = 0; j < result_count[i]; ++j, ++result_ptr) {
         auto& bbox = result_ptr->bbox;
-        INFO(">> bbox[{}].score: {}, coordinate: ", i, result_ptr->score);
+        MMDEPLOY_INFO(">> bbox[{}].score: {}, coordinate: ", i, result_ptr->score);
         for (auto& _bbox : result_ptr->bbox) {
-          INFO(">> >> ({}, {})", _bbox.x, _bbox.y);
+          MMDEPLOY_INFO(">> >> ({}, {})", _bbox.x, _bbox.y);
         }
       }
     }
@@ -49,12 +49,12 @@ TEST_CASE("test text detector's c api", "[text-detector]") {
   };
 
   auto& gResources = MMDeployTestResources::Get();
-  auto img_list = gResources.LocateImageResources("mmocr/images");
+  auto img_list = gResources.LocateImageResources(fs::path{"mmocr"} / "images");
   REQUIRE(!img_list.empty());
 
   for (auto& backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmocr/textdet/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmocr"} / "textdet" / "backend");
       REQUIRE(!model_list.empty());
       for (auto& model_path : model_list) {
         for (auto& device_name : gResources.device_names(backend)) {
diff --git a/tests/test_csrc/capi/test_text_recognizer.cpp b/tests/test_csrc/capi/test_text_recognizer.cpp
index 94f01063dc..3265c4b0ec 100644
--- a/tests/test_csrc/capi/test_text_recognizer.cpp
+++ b/tests/test_csrc/capi/test_text_recognizer.cpp
@@ -35,7 +35,7 @@ TEST_CASE("test text recognizer's c api", "[text-recognizer]") {
 
     for (auto i = 0; i < mats.size(); ++i) {
       std::vector<float> score(results[i].score, results[i].score + results[i].length);
-      INFO("image {}, text = {}, score = {}", i, results[i].text, score);
+      MMDEPLOY_INFO("image {}, text = {}, score = {}", i, results[i].text, score);
     }
 
     mmdeploy_text_recognizer_release_result(results, (int)mats.size());
@@ -43,12 +43,12 @@ TEST_CASE("test text recognizer's c api", "[text-recognizer]") {
   };
 
   auto& gResources = MMDeployTestResources::Get();
-  auto img_list = gResources.LocateImageResources("mmocr/images");
+  auto img_list = gResources.LocateImageResources(fs::path{"mmocr"} / "images");
   REQUIRE(!img_list.empty());
 
   for (auto& backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto model_list = gResources.LocateModelResources("mmocr/textreg/" + backend);
+      auto model_list = gResources.LocateModelResources(fs::path{"mmocr"} / "textreg" / "backend");
       REQUIRE(!model_list.empty());
       for (auto& model_path : model_list) {
         for (auto& device_name : gResources.device_names(backend)) {
@@ -93,7 +93,7 @@ TEST_CASE("test text detector-recognizer combo", "[text-detector-recognizer]") {
       for (int j = 0; j < bbox_count[i]; ++j) {
         auto& text = texts[offset + j];
         std::vector<float> score(text.score, text.score + text.length);
-        INFO("image {}, text = {}, score = {}", i, text.text, score);
+        MMDEPLOY_INFO("image {}, text = {}, score = {}", i, text.text, score);
       }
       offset += bbox_count[i];
     }
@@ -106,13 +106,15 @@ TEST_CASE("test text detector-recognizer combo", "[text-detector-recognizer]") {
   };
 
   auto& gResources = MMDeployTestResources::Get();
-  auto img_list = gResources.LocateImageResources("mmocr/images");
+  auto img_list = gResources.LocateImageResources(fs::path{"mmocr"} / "images");
   REQUIRE(!img_list.empty());
 
   for (auto& backend : gResources.backends()) {
     DYNAMIC_SECTION("loop backend: " << backend) {
-      auto det_model_list = gResources.LocateModelResources("/mmocr/textdet/" + backend);
-      auto reg_model_list = gResources.LocateModelResources("/mmocr/textreg/" + backend);
+      auto det_model_list =
+          gResources.LocateModelResources(fs::path{"mmocr"} / "textdet" / backend);
+      auto reg_model_list =
+          gResources.LocateModelResources(fs::path{"mmocr"} / "textreg" / backend);
       REQUIRE(!det_model_list.empty());
       REQUIRE(!reg_model_list.empty());
       auto det_model_path = det_model_list.front();
diff --git a/tests/test_csrc/core/test_mat.cpp b/tests/test_csrc/core/test_mat.cpp
index b1ae27cb35..bb3e1a8842 100644
--- a/tests/test_csrc/core/test_mat.cpp
+++ b/tests/test_csrc/core/test_mat.cpp
@@ -1,6 +1,8 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <array>
 #include <iostream>
+#include <numeric>
 
 #include "catch.hpp"
 #include "core/logger.h"
diff --git a/tests/test_csrc/core/test_status_code.cpp b/tests/test_csrc/core/test_status_code.cpp
index 33059862d4..1316a07952 100644
--- a/tests/test_csrc/core/test_status_code.cpp
+++ b/tests/test_csrc/core/test_status_code.cpp
@@ -26,13 +26,13 @@ TEST_CASE("test status_code", "[status_code]") {
     sqrt_of_negative().value();
   } catch (const Exception& e) {
     REQUIRE(e.code() == eInvalidArgument);
-    INFO("{}", e.what());
+    MMDEPLOY_INFO("{}", e.what());
   }
 
   auto r = sqrt_of_negative();
   REQUIRE(!r);
   REQUIRE(r.error() == eInvalidArgument);
-  INFO("{}", r.error().message().c_str());
+  MMDEPLOY_INFO("{}", r.error().message().c_str());
 }
 
 }  // namespace mmdeploy
diff --git a/tests/test_csrc/core/test_token.cpp b/tests/test_csrc/core/test_token.cpp
deleted file mode 100644
index de2e0f0cb2..0000000000
--- a/tests/test_csrc/core/test_token.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include <iostream>
-
-#include "catch.hpp"
-#include "experimental/collection.h"
-
-namespace token {
-
-using namespace mmdeploy::token;
-
-using batch_size = mmdeploy::Token<int32_t, decltype("batch_size"_ts)>;
-using type = mmdeploy::Token<std::string, decltype("type"_ts)>;
-using name = mmdeploy::Token<std::string, decltype("name"_ts)>;
-
-}  // namespace token
-
-TEST_CASE("test token", "[token]") {
-  using namespace mmdeploy::token;
-  using mmdeploy::Collection;
-
-  auto produce = [] {
-    Collection c;
-    c << token::batch_size{64} << token::type{"Resize"} << token::name("resize1");
-    return c;
-  };
-
-  auto c = produce();
-
-  auto consume = [](token::batch_size b, token::type t) {
-    std::cout << b.key() << ": " << *b << "\n" << t.key() << ": " << *t << "\n";
-    return std::string{"success"};
-  };
-
-  (void)Apply(consume, c);
-}
diff --git a/tests/test_csrc/core/test_value.cpp b/tests/test_csrc/core/test_value.cpp
index 07bfe6d7ff..0ecc1c629b 100644
--- a/tests/test_csrc/core/test_value.cpp
+++ b/tests/test_csrc/core/test_value.cpp
@@ -219,7 +219,7 @@ TEST_CASE("test pointer of Value", "[value]") {
   REQUIRE(p["object"].is_object());
   REQUIRE(p["array"].is_array());
   REQUIRE(p["array"].is_array());
-  INFO("{}", p);
+  MMDEPLOY_INFO("{}", p);
 }
 
 TEST_CASE("test null Value", "[value]") {
@@ -332,7 +332,7 @@ TEST_CASE("test speed of value", "[value]") {
   }
   auto t1 = std::chrono::high_resolution_clock::now();
   auto dt = std::chrono::duration<double, std::milli>(t1 - t0).count();
-  INFO("time = {}ms", (float)dt);
+  MMDEPLOY_INFO("time = {}ms", (float)dt);
 }
 
 TEST_CASE("test ctor of value", "[value]") {
diff --git a/tests/test_csrc/device/test_cpu_device.cpp b/tests/test_csrc/device/test_cpu_device.cpp
index 88164014cf..3109f6cd4b 100644
--- a/tests/test_csrc/device/test_cpu_device.cpp
+++ b/tests/test_csrc/device/test_cpu_device.cpp
@@ -10,93 +10,6 @@
 using namespace mmdeploy;
 using namespace std::string_literals;
 
-namespace mmdeploy {
-Kernel CreateCpuKernel(std::function<void()> task);
-}
-
-TEST_CASE("basic device", "[device]") {
-  Platform platform("cpu");
-  REQUIRE(platform.GetPlatformName() == "cpu"s);
-  REQUIRE(platform.GetPlatformId() == 0);
-
-  const Device host("cpu");
-  Stream stream(host);
-  //  REQUIRE(platform.CreateStream("cpu", &stream) == 0);
-  REQUIRE(stream);
-
-  SECTION("basic stream") {
-    bool set_me{};
-    auto kernel = CreateCpuKernel([&] { set_me = true; });
-    REQUIRE(kernel);
-    REQUIRE(stream.Submit(kernel));
-    REQUIRE(stream.Wait());
-    REQUIRE(set_me);
-  }
-
-  SECTION("recursive task") {
-    auto outer_loop = CreateCpuKernel([&] {
-      for (int i = 0; i < 10; ++i) {
-        auto inner_loop = CreateCpuKernel([&, i] {
-          for (int j = 0; j < 10; ++j) {
-            std::cerr << "(" << i << ", " << j << ") ";
-          }
-          std::cerr << "\n";
-        });
-        REQUIRE(stream.Submit(inner_loop));
-      }
-    });
-    REQUIRE(stream.Submit(outer_loop));
-    REQUIRE(stream.Wait());
-  }
-
-  SECTION("basic event") {
-    Event event(host);
-    //    REQUIRE(platform.CreateEvent("cpu", &event) == 0);
-    REQUIRE(event);
-    auto sleeping = CreateCpuKernel([&] {
-      std::cerr << "start sleeping\n";
-      for (int i = 0; i < 5; ++i) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        std::cerr << "0.1 second passed.\n";
-      }
-      std::cerr << "time's up, waking up.\n";
-    });
-    for (int i = 0; i < 2; ++i) {
-      REQUIRE(stream.Submit(sleeping));
-      REQUIRE(event.Record(stream));
-      REQUIRE(event.Wait());
-      std::cerr << "waked up.\n";
-    }
-  }
-
-  SECTION("event on stream") {
-    const int N = 10;
-    std::vector<Stream> streams;
-    streams.reserve(N);
-    for (int i = 0; i < N; ++i) {
-      streams.emplace_back(host);
-    }
-    std::vector<Event> events;
-    events.reserve(N);
-    for (int i = 0; i < N; ++i) {
-      events.emplace_back(host);
-    }
-    for (int i = 0; i < N; ++i) {
-      auto kernel = CreateCpuKernel([&, i] {
-        std::cerr << "greatings from stream " << i << ".\n";
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-        std::cerr << "0.1 second passed, goodbye.\n";
-      });
-      if (i) {
-        REQUIRE(streams[i].DependsOn(events[i - 1]));
-      }
-      REQUIRE(streams[i].Submit(kernel));
-      REQUIRE(events[i].Record(streams[i]));
-    }
-    REQUIRE(events.back().Wait());
-  }
-}
-
 TEST_CASE("test buffer", "[buffer]") {
   using namespace mmdeploy;
   Device device{"cpu"};
diff --git a/tests/test_csrc/model/test_directory_model.cpp b/tests/test_csrc/model/test_directory_model.cpp
index 6ea1bacc99..50091383ae 100644
--- a/tests/test_csrc/model/test_directory_model.cpp
+++ b/tests/test_csrc/model/test_directory_model.cpp
@@ -24,14 +24,14 @@ TEST_CASE("test directory model", "[model]") {
   REQUIRE(!directory_model_list.empty());
   auto model_dir = "sdk_models/good_model";
   REQUIRE(gResource.IsDir(model_dir));
-  auto model_path = gResource.resource_root_path() + "/" + model_dir;
-  REQUIRE(!model_impl->Init(model_path).has_error());
+  auto model_path = gResource.resource_root_path() / model_dir;
+  REQUIRE(!model_impl->Init(model_path.string()).has_error());
   REQUIRE(!model_impl->ReadFile("deploy.json").has_error());
   REQUIRE(model_impl->ReadFile("not-existing-file").has_error());
 
   model_dir = "sdk_models/bad_model";
   REQUIRE(gResource.IsDir(model_dir));
-  model_path = gResource.resource_root_path() + "/" + model_dir;
-  REQUIRE(!model_impl->Init(model_path).has_error());
+  model_path = gResource.resource_root_path() / model_dir;
+  REQUIRE(!model_impl->Init(model_path.string()).has_error());
   REQUIRE(model_impl->ReadMeta().has_error());
 }
diff --git a/tests/test_csrc/model/test_model.cpp b/tests/test_csrc/model/test_model.cpp
index b00f8c2b5f..34bd4c9841 100644
--- a/tests/test_csrc/model/test_model.cpp
+++ b/tests/test_csrc/model/test_model.cpp
@@ -24,7 +24,8 @@ TEST_CASE("model constructor", "[model]") {
 TEST_CASE("model init", "[model]") {
   auto& gResource = MMDeployTestResources::Get();
   for (auto& codebase : gResource.codebases()) {
-    if (auto img_list = gResource.LocateImageResources(codebase + "/images"); !img_list.empty()) {
+    if (auto img_list = gResource.LocateImageResources(fs::path{codebase} / "images");
+        !img_list.empty()) {
       Model model;
       REQUIRE(model.Init(img_list.front()).has_error());
       break;
@@ -32,7 +33,7 @@ TEST_CASE("model init", "[model]") {
   }
   for (auto& codebase : gResource.codebases()) {
     for (auto& backend : gResource.backends()) {
-      if (auto model_list = gResource.LocateModelResources(codebase + "/" + backend);
+      if (auto model_list = gResource.LocateModelResources(fs::path{codebase} / backend);
           !model_list.empty()) {
         Model model;
         REQUIRE(!model.Init(model_list.front()).has_error());
diff --git a/tests/test_csrc/model/test_zip_model.cpp b/tests/test_csrc/model/test_zip_model.cpp
index 48f787bdea..8d5cb9ca76 100644
--- a/tests/test_csrc/model/test_zip_model.cpp
+++ b/tests/test_csrc/model/test_zip_model.cpp
@@ -25,10 +25,10 @@ TEST_CASE("test zip model", "[zip_model]") {
 
   auto& gResource = MMDeployTestResources::Get();
   SECTION("bad sdk model") {
-    auto zip_model_path = "sdk_models/not_zip_file";
+    auto zip_model_path = fs::path{"sdk_models"} / "not_zip_file";
     REQUIRE(gResource.IsFile(zip_model_path));
-    auto model_path = gResource.resource_root_path() + "/" + zip_model_path;
-    REQUIRE(model_impl->Init(model_path).has_error());
+    auto model_path = gResource.resource_root_path() / zip_model_path;
+    REQUIRE(model_impl->Init(model_path.string()).has_error());
   }
   SECTION("bad zip buffer") {
     std::vector<char> buffer(100);
@@ -36,10 +36,10 @@ TEST_CASE("test zip model", "[zip_model]") {
   }
 
   SECTION("good sdk model") {
-    auto zip_model_path = "sdk_models/good_model.zip";
+    auto zip_model_path = fs::path{"sdk_models"} / "good_model.zip";
     REQUIRE(gResource.IsFile(zip_model_path));
-    auto model_path = gResource.resource_root_path() + "/" + zip_model_path;
-    REQUIRE(!model_impl->Init(model_path).has_error());
+    auto model_path = gResource.resource_root_path() / zip_model_path;
+    REQUIRE(!model_impl->Init(model_path.string()).has_error());
     REQUIRE(!model_impl->ReadFile("deploy.json").has_error());
     REQUIRE(model_impl->ReadFile("not-exist-file").has_error());
     REQUIRE(!model_impl->ReadMeta().has_error());
diff --git a/tests/test_csrc/net/test_ncnn_net.cpp b/tests/test_csrc/net/test_ncnn_net.cpp
index 98b348c19b..b55280c041 100644
--- a/tests/test_csrc/net/test_ncnn_net.cpp
+++ b/tests/test_csrc/net/test_ncnn_net.cpp
@@ -12,7 +12,7 @@ using namespace mmdeploy;
 
 TEST_CASE("test ncnn net", "[ncnn_net]") {
   auto& gResource = MMDeployTestResources::Get();
-  auto model_list = gResource.LocateModelResources("mmcls/ncnn");
+  auto model_list = gResource.LocateModelResources(fs::path{"mmcls"} / "ncnn");
   REQUIRE(!model_list.empty());
 
   Model model(model_list.front());
diff --git a/tests/test_csrc/net/test_openvino_net.cpp b/tests/test_csrc/net/test_openvino_net.cpp
index f4a2f683f3..c3f82eb61e 100644
--- a/tests/test_csrc/net/test_openvino_net.cpp
+++ b/tests/test_csrc/net/test_openvino_net.cpp
@@ -12,7 +12,7 @@ using namespace mmdeploy;
 
 TEST_CASE("test openvino net", "[openvino_net]") {
   auto& gResource = MMDeployTestResources::Get();
-  auto model_list = gResource.LocateModelResources("mmcls/openvino");
+  auto model_list = gResource.LocateModelResources(fs::path{"mmcls"} / "openvino");
   REQUIRE(!model_list.empty());
 
   Model model(model_list.front());
diff --git a/tests/test_csrc/net/test_ort_net.cpp b/tests/test_csrc/net/test_ort_net.cpp
index 506fbaf199..1162210009 100644
--- a/tests/test_csrc/net/test_ort_net.cpp
+++ b/tests/test_csrc/net/test_ort_net.cpp
@@ -12,7 +12,7 @@ using namespace mmdeploy;
 
 TEST_CASE("test ort net", "[ort_net]") {
   auto& gResource = MMDeployTestResources::Get();
-  auto model_list = gResource.LocateModelResources("mmcls/ort");
+  auto model_list = gResource.LocateModelResources(fs::path{"mmcls"} / "ort");
   REQUIRE(!model_list.empty());
 
   Model model(model_list.front());
diff --git a/tests/test_csrc/net/test_ppl_net.cpp b/tests/test_csrc/net/test_ppl_net.cpp
index 64a6a478a1..b5d34a8ab5 100644
--- a/tests/test_csrc/net/test_ppl_net.cpp
+++ b/tests/test_csrc/net/test_ppl_net.cpp
@@ -12,7 +12,7 @@ using namespace mmdeploy;
 
 TEST_CASE("test pplnn net", "[ppl_net]") {
   auto& gResource = MMDeployTestResources::Get();
-  auto model_list = gResource.LocateModelResources("mmcls/pplnn");
+  auto model_list = gResource.LocateModelResources(fs::path{"mmcls"} / "pplnn");
   REQUIRE(!model_list.empty());
 
   Model model(model_list.front());
diff --git a/tests/test_csrc/net/test_trt_net.cpp b/tests/test_csrc/net/test_trt_net.cpp
index 2b2841d772..c1c579b2c4 100644
--- a/tests/test_csrc/net/test_trt_net.cpp
+++ b/tests/test_csrc/net/test_trt_net.cpp
@@ -12,7 +12,7 @@ using namespace mmdeploy;
 
 TEST_CASE("test trt net", "[trt_net]") {
   auto& gResource = MMDeployTestResources::Get();
-  auto model_list = gResource.LocateModelResources("mmcls/trt");
+  auto model_list = gResource.LocateModelResources(fs::path{"mmcls"} / "trt");
   REQUIRE(!model_list.empty());
 
   Model model(model_list.front());
diff --git a/tests/test_csrc/preprocess/test_compose.cpp b/tests/test_csrc/preprocess/test_compose.cpp
index 9b7cd4d8d1..97e8ea452d 100644
--- a/tests/test_csrc/preprocess/test_compose.cpp
+++ b/tests/test_csrc/preprocess/test_compose.cpp
@@ -11,7 +11,7 @@
 #include "core/registry.h"
 #include "core/utils/formatter.h"
 #include "json.hpp"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "test_resource.h"
 #include "test_utils.h"
 
diff --git a/tests/test_csrc/preprocess/test_crop.cpp b/tests/test_csrc/preprocess/test_crop.cpp
index 836e527260..b5958b4218 100644
--- a/tests/test_csrc/preprocess/test_crop.cpp
+++ b/tests/test_csrc/preprocess/test_crop.cpp
@@ -4,7 +4,7 @@
 #include "catch.hpp"
 #include "core/mat.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/preprocess/test_image2tensor.cpp b/tests/test_csrc/preprocess/test_image2tensor.cpp
index 13de3e3414..16939a09b2 100644
--- a/tests/test_csrc/preprocess/test_image2tensor.cpp
+++ b/tests/test_csrc/preprocess/test_image2tensor.cpp
@@ -2,7 +2,7 @@
 #include "catch.hpp"
 #include "core/tensor.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/preprocess/test_load.cpp b/tests/test_csrc/preprocess/test_load.cpp
index fa7ef5867b..47abe91b3b 100644
--- a/tests/test_csrc/preprocess/test_load.cpp
+++ b/tests/test_csrc/preprocess/test_load.cpp
@@ -4,7 +4,7 @@
 #include "core/mat.h"
 #include "core/tensor.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/preprocess/test_normalize.cpp b/tests/test_csrc/preprocess/test_normalize.cpp
index 647203c02c..bf96f55b23 100644
--- a/tests/test_csrc/preprocess/test_normalize.cpp
+++ b/tests/test_csrc/preprocess/test_normalize.cpp
@@ -3,7 +3,7 @@
 #include "catch.hpp"
 #include "core/mat.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/preprocess/test_pad.cpp b/tests/test_csrc/preprocess/test_pad.cpp
index 3f1608b3b2..338be4bbaf 100644
--- a/tests/test_csrc/preprocess/test_pad.cpp
+++ b/tests/test_csrc/preprocess/test_pad.cpp
@@ -3,7 +3,7 @@
 #include "catch.hpp"
 #include "core/mat.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/preprocess/test_resize.cpp b/tests/test_csrc/preprocess/test_resize.cpp
index 8c63d5a19f..e5143f3091 100644
--- a/tests/test_csrc/preprocess/test_resize.cpp
+++ b/tests/test_csrc/preprocess/test_resize.cpp
@@ -3,7 +3,7 @@
 #include "catch.hpp"
 #include "core/mat.h"
 #include "core/utils/device_utils.h"
-#include "preprocess/cpu/opencv_utils.h"
+#include "opencv_utils.h"
 #include "preprocess/transform/transform.h"
 #include "test_resource.h"
 #include "test_utils.h"
diff --git a/tests/test_csrc/test_resource.h b/tests/test_csrc/test_resource.h
index 11fbd034e2..f59c79bf52 100644
--- a/tests/test_csrc/test_resource.h
+++ b/tests/test_csrc/test_resource.h
@@ -5,21 +5,13 @@
 #include <iostream>
 #include <map>
 #include <set>
+#include <sstream>
 #include <string>
 #include <vector>
 
+#include "core/utils/filesystem.h"
 #include "test_define.h"
 
-#if __GNUC__ >= 8
-#include <filesystem>
-namespace fs = std::filesystem;
-#else
-
-#include <experimental/filesystem>
-
-namespace fs = std::experimental::filesystem;
-#endif
-
 using namespace std;
 
 class MMDeployTestResources {
@@ -35,51 +27,51 @@ class MMDeployTestResources {
   }
   const std::vector<std::string> &backends() const { return backends_; }
   const std::vector<std::string> &codebases() const { return codebases_; }
-  const std::string &resource_root_path() const { return resource_root_path_; }
+  const fs::path &resource_root_path() const { return resource_root_path_; }
 
   bool HasDevice(const std::string &name) const {
     return std::any_of(devices_.begin(), devices_.end(),
                        [&](const std::string &device_name) { return device_name == name; });
   }
 
-  bool IsDir(const std::string &dir_name) const {
-    fs::path path{resource_root_path_ + "/" + dir_name};
+  bool IsDir(const fs::path &dir_name) const {
+    auto path = resource_root_path_ / dir_name;
     return fs::is_directory(path);
   }
 
-  bool IsFile(const std::string &file_name) const {
-    fs::path path{resource_root_path_ + "/" + file_name};
+  bool IsFile(const fs::path &file_name) const {
+    auto path = resource_root_path_ / file_name;
     return fs::is_regular_file(path);
   }
 
  public:
-  std::vector<std::string> LocateModelResources(const std::string &sdk_model_zoo_dir) {
+  std::vector<std::string> LocateModelResources(const fs::path &sdk_model_zoo_dir) {
     std::vector<std::string> sdk_model_list;
     if (resource_root_path_.empty()) {
       return sdk_model_list;
     }
 
-    fs::path path{resource_root_path_ + "/" + sdk_model_zoo_dir};
+    auto path = resource_root_path_ / sdk_model_zoo_dir;
     if (!fs::is_directory(path)) {
       return sdk_model_list;
     }
     for (auto const &dir_entry : fs::directory_iterator{path}) {
       fs::directory_entry entry{dir_entry.path()};
       if (auto const &_path = dir_entry.path(); fs::is_directory(_path)) {
-        sdk_model_list.push_back(dir_entry.path());
+        sdk_model_list.push_back(dir_entry.path().string());
       }
     }
     return sdk_model_list;
   }
 
-  std::vector<std::string> LocateImageResources(const std::string &img_dir) {
+  std::vector<std::string> LocateImageResources(const fs::path &img_dir) {
     std::vector<std::string> img_list;
 
     if (resource_root_path_.empty()) {
       return img_list;
     }
 
-    fs::path path{resource_root_path_ + "/" + img_dir};
+    auto path = resource_root_path_ / img_dir;
     if (!fs::is_directory(path)) {
       return img_list;
     }
@@ -122,15 +114,17 @@ class MMDeployTestResources {
     return result;
   }
 
-  std::string LocateResourceRootPath(const fs::path &cur_path, int max_depth) {
+  fs::path LocateResourceRootPath(const fs::path &cur_path, int max_depth) {
     if (max_depth < 0) {
       return "";
     }
     for (auto const &dir_entry : fs::directory_iterator{cur_path}) {
       fs::directory_entry entry{dir_entry.path()};
       auto const &_path = dir_entry.path();
-      if (fs::is_directory(_path) && _path.filename() == "mmdeploy_test_resources") {
-        return _path.string();
+      // filename must be checked before fs::is_directory, the latter will throw
+      // when _path points to a system file on Windows
+      if (_path.filename() == "mmdeploy_test_resources" && fs::is_directory(_path)) {
+        return _path;
       }
     }
     // Didn't find 'mmdeploy_test_resources' in current directory.
@@ -143,7 +137,8 @@ class MMDeployTestResources {
   std::vector<std::string> backends_;
   std::vector<std::string> codebases_;
   std::map<std::string, std::vector<std::string>> backend_devices_;
-  std::string resource_root_path_;
+  fs::path resource_root_path_;
+  //  std::string resource_root_path_;
 };
 
 #endif  // MMDEPLOY_TEST_RESOURCE_H
diff --git a/tests/test_ops/test_ops.py b/tests/test_ops/test_ops.py
index eeb86425fd..60143d878f 100644
--- a/tests/test_ops/test_ops.py
+++ b/tests/test_ops/test_ops.py
@@ -105,12 +105,14 @@ def wrapped_function(inputs, grid):
 @pytest.mark.parametrize('dynamic_export', [True, False])
 @pytest.mark.parametrize('mode', ['bicubic', 'nearest'])
 @pytest.mark.parametrize('align_corners', [True, False])
-@pytest.mark.parametrize('scale_factor', [2, 4])
+@pytest.mark.parametrize('output_size', [[10, 20], None])
+@pytest.mark.parametrize('scale_factor', [2])
 @pytest.mark.parametrize('n, c, h, w', [(2, 3, 5, 10)])
 def test_bicubic_interpolate(backend,
                              dynamic_export,
                              mode,
                              align_corners,
+                             output_size,
                              scale_factor,
                              n,
                              c,
@@ -140,8 +142,12 @@ def test_bicubic_interpolate(backend,
 
     if mode == 'nearest':
         align_corners = None
-    resize = nn.Upsample(
-        scale_factor=scale_factor, mode=mode, align_corners=align_corners)
+    if output_size is None:
+        resize = nn.Upsample(
+            scale_factor=scale_factor, mode=mode, align_corners=align_corners)
+    else:
+        resize = nn.Upsample(
+            size=output_size, mode=mode, align_corners=align_corners)
     expected_result = resize(input).cuda()
     wrapped_model = WrapFunction(resize).eval()
 
diff --git a/tests/test_pytorch/test_pytorch_ops.py b/tests/test_pytorch/test_pytorch_ops.py
index 69c9e12ed7..9a03148817 100644
--- a/tests/test_pytorch/test_pytorch_ops.py
+++ b/tests/test_pytorch/test_pytorch_ops.py
@@ -116,3 +116,10 @@ def test_squeeze(self):
         nodes = get_model_onnx_nodes(model, x)
         assert nodes[0].attribute[0].ints == [0]
         assert nodes[0].op_type == 'Squeeze'
+
+
+def test_hardsigmoid():
+    x = torch.rand(1, 2, 3, 4)
+    model = torch.nn.Hardsigmoid().eval()
+    nodes = get_model_onnx_nodes(model, x)
+    assert nodes[0].op_type == 'HardSigmoid'
diff --git a/tests/test_utils/test_util.py b/tests/test_utils/test_util.py
index e9f5ad33c2..d4e6764eec 100644
--- a/tests/test_utils/test_util.py
+++ b/tests/test_utils/test_util.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import importlib
 import logging
 import os
 import tempfile
@@ -144,7 +145,8 @@ def test_get_onnx_config(self):
 
 class TestIsDynamic:
 
-    config_with_onnx_config = mmcv.Config(dict(onnx_config=dict()))
+    config_with_onnx_config = mmcv.Config(
+        dict(onnx_config=dict(), backend_config=dict(type='default')))
 
     config_with_dynamic_axes = mmcv.Config(
         dict(
@@ -154,7 +156,8 @@ class TestIsDynamic:
                     0: 'batch',
                     2: 'height',
                     3: 'width'
-                }})))
+                }}),
+            backend_config=dict(type='default')))
 
     config_with_dynamic_axes_and_input_names = mmcv.Config(
         dict(
@@ -165,12 +168,14 @@ class TestIsDynamic:
                     0: 'batch',
                     2: 'height',
                     3: 'width'
-                }})))
+                }}),
+            backend_config=dict(type='default')))
 
     config_with_dynamic_axes_list = mmcv.Config(
         dict(
             onnx_config=dict(
-                type='onnx', input_names=['image'], dynamic_axes=[[0, 2, 3]])))
+                type='onnx', input_names=['image'], dynamic_axes=[[0, 2, 3]]),
+            backend_config=dict(type='default')))
 
     def test_is_dynamic_batch_none(self):
         assert util.is_dynamic_batch(
@@ -440,3 +445,25 @@ def test_get_root_logger():
     from mmdeploy.utils import get_root_logger
     logger = get_root_logger()
     logger.info('This is a test message')
+
+
+def test_get_library_version():
+    assert util.get_library_version('abcdefg') is None
+    try:
+        lib = importlib.import_module('setuptools')
+    except ImportError:
+        pass
+    else:
+        assert util.get_library_version('setuptools') == lib.__version__
+
+
+def test_get_codebase_version():
+    versions = util.get_codebase_version()
+    for k, v in versions.items():
+        assert v == util.get_library_version(k)
+
+
+def test_get_backend_version():
+    versions = util.get_backend_version()
+    for k, v in versions.items():
+        assert v == util.get_library_version(k)
diff --git a/tools/check_env.py b/tools/check_env.py
index 68aa2799e7..3718db1bd5 100644
--- a/tools/check_env.py
+++ b/tools/check_env.py
@@ -4,49 +4,36 @@
 from mmcv.utils import get_git_hash
 
 import mmdeploy
-from mmdeploy.utils import get_root_logger
+from mmdeploy.utils import (get_backend_version, get_codebase_version,
+                            get_root_logger)
 
 
 def collect_env():
     """Collect the information of the running environments."""
     env_info = collect_base_env()
-    env_info['MMDeployment'] = f'{mmdeploy.__version__}+{get_git_hash()[:7]}'
+    env_info['MMDeploy'] = f'{mmdeploy.__version__}+{get_git_hash()[:7]}'
 
     return env_info
 
 
 def check_backend():
-    try:
-        import onnxruntime as ort
-    except ImportError:
-        ort_version = None
-    else:
-        ort_version = ort.__version__
+    backend_versions = get_backend_version()
+    ort_version = backend_versions['onnxruntime']
+    trt_version = backend_versions['tensorrt']
+    ncnn_version = backend_versions['ncnn']
+
     import mmdeploy.apis.onnxruntime as ort_apis
     logger = get_root_logger()
-    logger.info(f'onnxruntime: {ort_version} ops_is_avaliable : '
+    logger.info(f'onnxruntime: {ort_version}\tops_is_avaliable : '
                 f'{ort_apis.is_available()}')
 
-    try:
-        import tensorrt as trt
-    except ImportError:
-        trt_version = None
-    else:
-        trt_version = trt.__version__
     import mmdeploy.apis.tensorrt as trt_apis
-    logger.info(
-        f'tensorrt: {trt_version} ops_is_avaliable : {trt_apis.is_available()}'
-    )
-
-    try:
-        import ncnn
-    except ImportError:
-        ncnn_version = None
-    else:
-        ncnn_version = ncnn.__version__
+    logger.info(f'tensorrt: {trt_version}\tops_is_avaliable : '
+                f'{trt_apis.is_available()}')
+
     import mmdeploy.apis.ncnn as ncnn_apis
     logger.info(
-        f'ncnn: {ncnn_version} ops_is_avaliable : {ncnn_apis.is_available()}')
+        f'ncnn: {ncnn_version}\tops_is_avaliable : {ncnn_apis.is_available()}')
 
     import mmdeploy.apis.pplnn as pplnn_apis
     logger.info(f'pplnn_is_avaliable: {pplnn_apis.is_available()}')
@@ -56,45 +43,9 @@ def check_backend():
 
 
 def check_codebase():
-    try:
-        import mmcls
-    except ImportError:
-        mmcls_version = None
-    else:
-        mmcls_version = mmcls.__version__
-    logger.info(f'mmcls: {mmcls_version}')
-
-    try:
-        import mmdet
-    except ImportError:
-        mmdet_version = None
-    else:
-        mmdet_version = mmdet.__version__
-    logger.info(f'mmdet: {mmdet_version}')
-
-    try:
-        import mmedit
-    except ImportError:
-        mmedit_version = None
-    else:
-        mmedit_version = mmedit.__version__
-    logger.info(f'mmedit: {mmedit_version}')
-
-    try:
-        import mmocr
-    except ImportError:
-        mmocr_version = None
-    else:
-        mmocr_version = mmocr.__version__
-    logger.info(f'mmocr: {mmocr_version}')
-
-    try:
-        import mmseg
-    except ImportError:
-        mmseg_version = None
-    else:
-        mmseg_version = mmseg.__version__
-    logger.info(f'mmseg: {mmseg_version}')
+    codebase_versions = get_codebase_version()
+    for k, v in codebase_versions.items():
+        logger.info(f'{k}:\t{v}')
 
 
 if __name__ == '__main__':
diff --git a/tools/deploy.py b/tools/deploy.py
index ef854764df..ee0dd7fb43 100644
--- a/tools/deploy.py
+++ b/tools/deploy.py
@@ -10,9 +10,9 @@
 
 from mmdeploy.apis import (create_calib_table, extract_model,
                            get_predefined_partition_cfg, torch2onnx,
-                           visualize_model)
-from mmdeploy.utils import (Backend, get_backend, get_calib_filename,
-                            get_ir_config, get_model_inputs, get_onnx_config,
+                           torch2torchscript, visualize_model)
+from mmdeploy.utils import (IR, Backend, get_backend, get_calib_filename,
+                            get_ir_config, get_model_inputs,
                             get_partition_config, get_root_logger, load_config,
                             target_wrapper)
 from mmdeploy.utils.export_info import dump_info
@@ -66,6 +66,21 @@ def create_process(name, target, args, kwargs, ret_value=None):
             logger.info(f'{name} success.')
 
 
+def torch2ir(ir_type: IR):
+    """Return the conversion function from torch to the intermediate
+    representation.
+
+    Args:
+        ir_type (IR): The type of the intermediate representation.
+    """
+    if ir_type == IR.ONNX:
+        return torch2onnx
+    elif ir_type == IR.TORCHSCRIPT:
+        return torch2torchscript
+    else:
+        raise KeyError(f'Unexpected IR type {ir_type}')
+
+
 def main():
     args = parse_args()
     set_start_method('spawn')
@@ -87,18 +102,20 @@ def main():
 
     ret_value = mp.Value('d', 0, lock=False)
 
-    # convert onnx
-    onnx_save_file = get_onnx_config(deploy_cfg)['save_file']
+    # convert to IR
+    ir_config = get_ir_config(deploy_cfg)
+    ir_save_file = ir_config['save_file']
+    ir_type = IR.get(ir_config['type'])
     create_process(
-        'torch2onnx',
-        target=torch2onnx,
-        args=(args.img, args.work_dir, onnx_save_file, deploy_cfg_path,
+        f'torch2{ir_type.value}',
+        target=torch2ir(ir_type),
+        args=(args.img, args.work_dir, ir_save_file, deploy_cfg_path,
               model_cfg_path, checkpoint_path),
         kwargs=dict(device=args.device),
         ret_value=ret_value)
 
     # convert backend
-    onnx_files = [osp.join(args.work_dir, onnx_save_file)]
+    ir_files = [osp.join(args.work_dir, ir_save_file)]
 
     # partition model
     partition_cfgs = get_partition_config(deploy_cfg)
@@ -112,8 +129,8 @@ def main():
             partition_cfgs = get_predefined_partition_cfg(
                 deploy_cfg, partition_cfgs['type'])
 
-        origin_onnx_file = onnx_files[0]
-        onnx_files = []
+        origin_ir_file = ir_files[0]
+        ir_files = []
         for partition_cfg in partition_cfgs:
             save_file = partition_cfg['save_file']
             save_path = osp.join(args.work_dir, save_file)
@@ -124,11 +141,11 @@ def main():
             create_process(
                 f'partition model {save_file} with start: {start}, end: {end}',
                 extract_model,
-                args=(origin_onnx_file, start, end),
+                args=(origin_ir_file, start, end),
                 kwargs=dict(dynamic_axes=dynamic_axes, save_file=save_path),
                 ret_value=ret_value)
 
-            onnx_files.append(save_path)
+            ir_files.append(save_path)
 
     # calib data
     calib_filename = get_calib_filename(deploy_cfg)
@@ -146,12 +163,12 @@ def main():
                 device=args.device),
             ret_value=ret_value)
 
-    backend_files = onnx_files
+    backend_files = ir_files
     # convert backend
     backend = get_backend(deploy_cfg)
     if backend == Backend.TENSORRT:
         model_params = get_model_inputs(deploy_cfg)
-        assert len(model_params) == len(onnx_files)
+        assert len(model_params) == len(ir_files)
 
         from mmdeploy.apis.tensorrt import is_available as trt_is_available
         from mmdeploy.apis.tensorrt import onnx2tensorrt
@@ -160,7 +177,7 @@ def main():
             + ' please install TensorRT and build TensorRT custom ops first.'
         backend_files = []
         for model_id, model_param, onnx_path in zip(
-                range(len(onnx_files)), model_params, onnx_files):
+                range(len(ir_files)), model_params, ir_files):
             onnx_name = osp.splitext(osp.split(onnx_path)[1])[0]
             save_file = model_param.get('save_file', onnx_name + '.engine')
 
@@ -186,7 +203,7 @@ def main():
         from mmdeploy.apis.ncnn import get_output_model_file, onnx2ncnn
 
         backend_files = []
-        for onnx_path in onnx_files:
+        for onnx_path in ir_files:
             model_param_path, model_bin_path = get_output_model_file(
                 onnx_path, args.work_dir)
             create_process(
@@ -207,7 +224,7 @@ def main():
                                             get_output_model_file,
                                             onnx2openvino)
         openvino_files = []
-        for onnx_path in onnx_files:
+        for onnx_path in ir_files:
             model_xml_path = get_output_model_file(onnx_path, args.work_dir)
             input_info = get_input_info_from_cfg(deploy_cfg)
             output_names = get_ir_config(deploy_cfg).output_names
@@ -227,7 +244,7 @@ def main():
 
         from mmdeploy.apis.pplnn import onnx2pplnn
         pplnn_files = []
-        for onnx_path in onnx_files:
+        for onnx_path in ir_files:
             algo_file = onnx_path.replace('.onnx', '.json')
             model_inputs = get_model_inputs(deploy_cfg)
             assert 'opt_shape' in model_inputs, 'Expect opt_shape ' \
diff --git a/tools/onnx2ncnn.py b/tools/onnx2ncnn.py
index 6f2bb6190b..0bddd6e036 100644
--- a/tools/onnx2ncnn.py
+++ b/tools/onnx2ncnn.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import argparse
 import logging