Skip to content

Commit

Permalink
[runtime/xpu] Support the execution of non-streaming parsing on the K…
Browse files Browse the repository at this point in the history
…unlun XPU card #1455
  • Loading branch information
panhehe committed Oct 26, 2022
1 parent 89e8d0d commit 7fb4fea
Show file tree
Hide file tree
Showing 28 changed files with 3,404 additions and 6 deletions.
37 changes: 37 additions & 0 deletions runtime/core/cmake/xpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
if(NOT WIN32)
string(ASCII 27 Esc)
set(ColourReset "${Esc}[m")
set(ColourBold "${Esc}[1m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(Yellow "${Esc}[33m")
set(Blue "${Esc}[34m")
set(Magenta "${Esc}[35m")
set(Cyan "${Esc}[36m")
set(White "${Esc}[37m")
set(BoldRed "${Esc}[1;31m")
set(BoldGreen "${Esc}[1;32m")
set(BoldYellow "${Esc}[1;33m")
set(BoldBlue "${Esc}[1;34m")
set(BoldMagenta "${Esc}[1;35m")
set(BoldCyan "${Esc}[1;36m")
set(BoldWhite "${Esc}[1;37m")
endif()

if(XPU)
set(RUNTIME_XPU_PATH ${CMAKE_CURRENT_SOURCE_DIR})
message(STATUS "RUNTIME_XPU_PATH is ${RUNTIME_XPU_PATH} .\n")
set(XPU_KUNLUN_PATH ${RUNTIME_XPU_PATH}/xpu/)
if(NOT DEFINED ENV{XPU_API_PATH})
message(FATAL_ERROR "${BoldRed}NO ENV{XPU_API_PATH} in your env. Please set XPU_API_PATH.${ColourReset}\n")
else()
set(XPU_API_PATH $ENV{XPU_API_PATH})
message("set XPU_API_PATH from env_var. Val is $ENV{XPU_API_PATH}.")
endif()

include_directories(${XPU_KUNLUN_PATH}/
${XPU_API_PATH}/output/include ${XPU_API_PATH}/../runtime/include)
link_directories(${XPU_API_PATH}/output/so/ ${XPU_API_PATH}/../runtime/output/so/)

add_definitions(-DUSE_XPU)
endif()
19 changes: 16 additions & 3 deletions runtime/core/decoder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ set(decoder_srcs
ctc_endpoint.cc
)

if(NOT TORCH AND NOT ONNX)
message(FATAL_ERROR "Please build with TORCH or ONNX!!!")
if(NOT TORCH AND NOT ONNX AND NOT XPU)
message(FATAL_ERROR "Please build with TORCH or ONNX or XPU!!!")
endif()
if(TORCH)
list(APPEND decoder_srcs torch_asr_model.cc)
Expand All @@ -17,8 +17,21 @@ if(ONNX)
list(APPEND decoder_srcs onnx_asr_model.cc)
endif()

if(XPU)
list(APPEND decoder_srcs ../xpu/xpu_asr_model.cc)
list(APPEND decoder_srcs ../xpu/xpu_conformer.cpp)
list(APPEND decoder_srcs ../xpu/xpu_util.cpp)
message(STATUS "xpu decoder_srcs is :: ${decoder_srcs} \n")
endif()

add_library(decoder STATIC ${decoder_srcs})
target_link_libraries(decoder PUBLIC kaldi-decoder frontend post_processor utils)
if(XPU)
target_link_libraries(decoder PUBLIC kaldi-decoder frontend
post_processor utils xpuapi xpurt)
else()
target_link_libraries(decoder PUBLIC kaldi-decoder frontend
post_processor utils)
endif()

if(ANDROID)
target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
Expand Down
26 changes: 23 additions & 3 deletions runtime/core/decoder/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.


#ifndef DECODER_PARAMS_H_
#define DECODER_PARAMS_H_

Expand All @@ -29,17 +28,24 @@
#ifdef USE_TORCH
#include "decoder/torch_asr_model.h"
#endif
#ifdef USE_XPU
#include "xpu/xpu_asr_model.h"
#endif
#include "frontend/feature_pipeline.h"
#include "post_processor/post_processor.h"
#include "utils/flags.h"
#include "utils/string.h"

DEFINE_int32(num_threads, 1, "num threads for ASR model");
DEFINE_int32(device_id, 0, "set XPU DeviceID for ASR model");

// TorchAsrModel flags
DEFINE_string(model_path, "", "pytorch exported model path");
// OnnxAsrModel flags
DEFINE_string(onnx_dir, "", "directory where the onnx model is saved");
// XPUAsrModel flags
DEFINE_string(xpu_model_dir, "",
"directory where the XPU model and weights is saved");

// FeaturePipelineConfig flags
DEFINE_int32(num_bins, 80, "num mel bins for fbank feature");
Expand All @@ -66,7 +72,8 @@ DEFINE_double(lattice_beam, 10.0, "lattice beam in ctc wfst search");
DEFINE_double(acoustic_scale, 1.0, "acoustic scale for ctc wfst search");
DEFINE_double(blank_skip_thresh, 1.0,
"blank skip thresh for ctc wfst search, 1.0 means no skip");
DEFINE_double(length_penalty, 0.0, "length penalty ctc wfst search, will not"
DEFINE_double(length_penalty, 0.0,
"length penalty ctc wfst search, will not"
"apply on self-loop arc, for balancing the del/ins ratio, "
"suggest set to -3.0");
DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
Expand Down Expand Up @@ -130,7 +137,7 @@ std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
#else
LOG(FATAL) << "Please rebuild with cmake options '-DONNX=ON'.";
#endif
} else {
} else if (!FLAGS_model_path.empty()) {
#ifdef USE_TORCH
LOG(INFO) << "Reading torch model " << FLAGS_model_path;
TorchAsrModel::InitEngineThreads(FLAGS_num_threads);
Expand All @@ -140,6 +147,19 @@ std::shared_ptr<DecodeResource> InitDecodeResourceFromFlags() {
#else
LOG(FATAL) << "Please rebuild with cmake options '-DTORCH=ON'.";
#endif
} else if (!FLAGS_xpu_model_dir.empty()) {
#ifdef USE_XPU
LOG(INFO) << "Reading XPU WeNet model weight from " << FLAGS_xpu_model_dir;
auto model = std::make_shared<XPUAsrModel>();
model->SetEngineThreads(FLAGS_num_threads);
model->SetDeviceId(FLAGS_device_id);
model->Read(FLAGS_xpu_model_dir);
resource->model = model;
#else
LOG(FATAL) << "Please rebuild with cmake options '-DXPU=ON'.";
#endif
} else {
LOG(FATAL) << "Please set ONNX, TORCH or XPU model path!!!";
}

LOG(INFO) << "Reading unit table " << FLAGS_unit_path;
Expand Down
2 changes: 2 additions & 0 deletions runtime/kunlun/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build/
fc_base/
81 changes: 81 additions & 0 deletions runtime/kunlun/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(wenet VERSION 0.1)

option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
option(FST_HAVE_BIN "whether to build fst binaries" OFF)
option(BUILD_TESTING "whether to build unit test" OFF)
option(GRPC "whether to build with gRPC" OFF)
# TODO(Binbin Zhang): Change websocket to OFF since it depends on boost
# which is a very big library
option(WEBSOCKET "whether to build with websocket" OFF)
option(TORCH "whether to build with Torch" OFF)
option(XPU "whether to build with XPU" ON)
option(ONNX "whether to build with ONNX" OFF)
option(GPU "whether to build with GPU" OFF)

set(CMAKE_VERBOSE_MAKEFILE OFF)

include(FetchContent)
include(ExternalProject)
set(FETCHCONTENT_QUIET OFF)
get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_base})

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

if(NOT MSVC)
# Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")
else()
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
endif()

# Include all dependency
include(libtorch)
if(ONNX)
include(onnx)
endif()
if(XPU)
include(xpu)
# compile conformer_test
add_subdirectory(xpu)
endif()
include(openfst)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
)

# Build all libraries
add_subdirectory(utils)
if(NOT MSVC)
add_dependencies(utils openfst)
endif()
add_subdirectory(frontend)
add_subdirectory(post_processor)
add_subdirectory(kaldi) # kaldi: wfst based decoder
add_subdirectory(decoder)
add_subdirectory(api)

# Optionally, you can build with websocket
if(WEBSOCKET)
include(boost)
add_subdirectory(websocket)
endif()

# Optionally, you can build with gRPC
if(GRPC)
include(grpc)
add_subdirectory(grpc)
endif()

# Build all bins
add_subdirectory(bin)

# Unit Test
if(BUILD_TESTING)
include(gtest)
add_subdirectory(test)
endif()
48 changes: 48 additions & 0 deletions runtime/kunlun/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# 在昆仑芯片上运行Wenet
## 介绍
下面的示例展示了如何在XPU上部署WeNet离线或在线的ASR模型。XPU是一种由昆仑芯100%自主研发的通用人工智能计算核心架构。

## 准备XPU运行环境

在开始之前,请确认您获得以下必须的环境。

XRE(XPU Runtime Environment):昆仑芯片的基础运行环境,包括芯片驱动程序、runtime api库、固件FW工具等功能模块。
XDNN(XPU Deep Neural Network Library):加速深度神经网络的昆仑芯片库,提供应用程序中使用的高性能DNN功能库。

如果您需要任何帮助,或是想要进一步了解昆仑芯片,请通过官方网址联系我们:
https://www.kunlunxin.com.cn/

## 操作步骤
- 第一步:构建,需要cmake 3.14及以上版本

``` sh
export CXX=${your_g++_path}
export CC=${your_gcc_path}
export XPU_API_PATH=${your_api_path}

# -r : release version; -d : debug version
bash ./compile.sh -r
```

- 第二步:测试,测试结果将在控制台输出

``` sh
## set KUNLUN XPU visible device
export XPU_VISIBLE_DEVICES=0
export XPUSIM_DEVICE_MODEL=KUNLUN2
## set logging level
export GLOG_logtostderr=1
export GLOG_v=3
## set speech wav and model/weight path
wav_path=${your_test_wav_path}
xpu_model_dir=${your_xpu_weight_dir}
units=${your_units.txt}
## executive command
./build/bin/decoder_main \
--chunk_size -1 \
--wav_path ${wav_path} \
--xpu_model_dir ${xpu_model_di} \
--unit_path ${units} \
--device_id 0 \
--nbest 3 2>&1 | tee log.txt
```
52 changes: 52 additions & 0 deletions runtime/kunlun/README_EN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# WeNet running on KUNLUNXIN XPU device
## Introduction
The below example shows how to deploy WeNet offline and online ASR models on XPUs.
XPU is a core architecture 100% independently developed by KUNLUNXIN for general artificial intelligence computing.

## Setup environment for XPU device

Before the start, makesure you have these necessary environment

XRE(XPU Runtime Environment):The basic operating environment of the XPUs
includes functional modules such as chip drivers, runtime api library, and firmware tools.

XDNN(XPU Deep Neural Network Library): XPU library for accelerating deep neural networks, providing high-performance DNN function library used in applications.

If you would like to know more about XPUs or need any help, please contact us through the official website:

https://www.kunlunxin.com.cn/

## Instruction
- Step 1. Build, the build requires cmake 3.14 or above.

``` sh
export CXX=${your_g++_path}
export CC=${your_gcc_path}
export XPU_API_PATH=${your_api_path}

# -r : release version; -d : debug version
bash ./compile.sh -r
```

- Step 2. Testing, the result is shown in the console.

``` sh
## set KUNLUN XPU visible device
export XPU_VISIBLE_DEVICES=0
export XPUSIM_DEVICE_MODEL=KUNLUN2
## set logging level
export GLOG_logtostderr=1
export GLOG_v=3
## set speech wav and model/weight/units path
wav_path=${your_test_wav_path}
xpu_model_dir=${your_xpu_weight_dir}
units=${your_units.txt}
## executive command
./build/bin/decoder_main \
--chunk_size -1 \
--wav_path $wav_path \
--xpu_model_dir $xpu_model_dir \
--unit_path $units \
--device_id 0 \
--nbest 3 2>&1 | tee log.txt
```
1 change: 1 addition & 0 deletions runtime/kunlun/api
1 change: 1 addition & 0 deletions runtime/kunlun/bin
1 change: 1 addition & 0 deletions runtime/kunlun/cmake
Loading

0 comments on commit 7fb4fea

Please sign in to comment.