CoinCheung · JsutCheng · Nov 18, 2021 · Nov 18, 2021 · Nov 25, 2021
diff --git a/.gitignore b/.gitignore
@@ -117,5 +117,7 @@ pretrained/*
 dist_train.sh
 openvino/build/*
 openvino/output*
+*.onnx
+tis/cpp_client/build/*
 
 tvm/
diff --git a/README.md b/README.md
@@ -37,6 +37,9 @@ You can go to [ncnn](./ncnn) for details.
 3. openvino  
 You can go to [openvino](./openvino) for details.  
 
+4. tis  
+Triton Inference Server(TIS) provides a service solution of deployment. You can go to [tis](./tis) for details.
+
 
 ## platform
 
@@ -163,3 +166,4 @@ $ python tools/evaluate.py --config configs/bisenetv1_city.py --weight-path /pat
 ### Be aware that this is the refactored version of the original codebase. You can go to the `old` directory for original implementation if you need, though I believe you will not need it.
 
 
+#let me see see dev branch
diff --git a/openvino/README.md b/openvino/README.md
@@ -12,7 +12,7 @@ My cpu is Intel(R) Xeon(R) Gold 6240 CPU @ 2.60GHz.
 1.Train the model and export it to onnx  
 ```
 $ cd BiSeNet/
-$ python tools/export_onnx.py --aux-mode eval --config configs/bisenetv2_city.py --weight-path /path/to/your/model.pth --outpath ./model_v2.onnx 
+$ python tools/export_onnx.py --config configs/bisenetv2_city.py --weight-path /path/to/your/model.pth --outpath ./model_v2.onnx 
 ```
 (Optional) 2.Install 'onnx-simplifier' to simplify the generated onnx model:
 ```

diff --git a/tis/README.md b/tis/README.md
@@ -0,0 +1,95 @@
+
+
+## A simple demo of using trition-inference-serving
+
+### Platform
+
+* ubuntu 18.04
+* cmake-3.22.0
+* 8 Tesla T4 gpu 
+
+
+### Serving Model
+
+#### 1. prepare model repository
+
+We need to export our model to onnx and copy it to model repository:
+```
+$ cd BiSeNet
+$ python tools/export_onnx.py --config configs/bisenetv1_city.py --weight-path /path/to/your/model.pth --outpath ./model.onnx 
+$ cp -riv ./model.onnx tis/models/bisenetv1/1
+
+$ python tools/export_onnx.py --config configs/bisenetv2_city.py --weight-path /path/to/your/model.pth --outpath ./model.onnx 
+$ cp -riv ./model.onnx tis/models/bisenetv2/1
+```
+
+#### 2. start service
+We start serving with docker:
+```
+$ docker pull nvcr.io/nvidia/tritonserver:21.10-py3
+$ docker run --gpus all --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /path/to/BiSeNet/tis/models:/models nvcr.io/nvidia/tritonserver:21.10-py3 tritonserver --model-repository=/models
+```
+
+In general, the service would start now. You can check whether service has started by:  
+```
+$ curl -v localhost:8000/v2/health/ready
+```
+
+By default, we use gpu 0 and gpu 1, you can change configurations in the `config.pbtxt` file.
+
+
+### Client
+
+We call the model service with both python and c++ method.  
+
+
+#### 1. python method
+
+Firstly, we need to install dependency package:  
+```
+$ python -m pip install tritonclient[all]==2.15.0
+```
+
+Then we can run the script: 
+```
+$ cd BiSeNet/tis
+$ python client.py
+```
+
+This would generate a result file named `res.jpg` in `BiSeNet/tis` directory.
+
+
+#### 2. c++ method
+
+We need to compile c++ client library from source: 
+```
+$ apt install rapidjson-dev
+$ mkdir -p /data/ $$ cd /data/
+$ git clone https://github.com/triton-inference-server/client.git
+$ cd client && git reset --hard da04158bc094925a56b
+$ mkdir -p build && cd build
+$ cmake -DCMAKE_INSTALL_PREFIX=/opt/triton_client -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON -DTRITON_ENABLE_PERF_ANALYZER=OFF -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF -DTRITON_ENABLE_JAVA_HTTP=OFF -DTRITON_ENABLE_GPU=ON -DTRITON_ENABLE_EXAMPLES=OFF -DTRITON_ENABLE_TESTS=ON ..
+$ make cc-clients
+```
+The above commands are exactly what I used to compile the library. I learned these commands from the official document.
+
+Also, We need to install `cmake` with version `3.22`.
+
+Optionally, I compiled opencv from source and install it to `/opt/opencv`. You can first skip this and see whether you meet problems. If you have problems about opencv in the following steps, you can compile opencv as what I do.
+
+After installing the dependencies, we can compile our c++ client:
+```
+$ cd BiSeNet/tis/cpp_client
+$ mkdir -p build && cd build
+$ cmake .. && make
+```
+
+Finally, we run the client and see a result file named `res.jpg` generated:
+```
+    ./client
+```
+
+
+### In the end
+
+This is a simple demo with only basic function. There are many other features that is useful, such as shared memory and model pipeline. If you have interest on this, you can learn more in the official document.
diff --git a/tis/client.py b/tis/client.py
@@ -0,0 +1,88 @@
+
+import numpy as np
+import cv2
+
+import grpc
+
+from tritonclient.grpc import service_pb2, service_pb2_grpc
+import tritonclient.grpc.model_config_pb2 as mc
+
+
+np.random.seed(123)
+palette = np.random.randint(0, 256, (100, 3))
+
+
+
+#  url = '10.128.61.7:8001'
+url = '127.0.0.1:8001'
+model_name = 'bisenetv2'
+model_version = '1'
+inp_name = 'input_image'
+outp_name = 'preds'
+inp_dtype = 'FP32'
+outp_dtype = np.int64
+inp_shape = [1, 3, 1024, 2048]
+outp_shape = [1024, 2048]
+impth = '../example.png'
+mean = [0.3257, 0.3690, 0.3223] # city, rgb
+std = [0.2112, 0.2148, 0.2115]
+
+
+option = [
+        ('grpc.max_receive_message_length', 1073741824),
+        ('grpc.max_send_message_length', 1073741824),
+        ]
+channel = grpc.insecure_channel(url, options=option)
+grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel)
+
+
+metadata_request = service_pb2.ModelMetadataRequest(
+    name=model_name, version=model_version)
+metadata_response = grpc_stub.ModelMetadata(metadata_request)
+print(metadata_response)
+
+config_request = service_pb2.ModelConfigRequest(
+        name=model_name,
+        version=model_version)
+config_response = grpc_stub.ModelConfig(config_request)
+print(config_response)
+
+
+request = service_pb2.ModelInferRequest()
+request.model_name = model_name
+request.model_version = model_version
+
+inp = service_pb2.ModelInferRequest().InferInputTensor()
+inp.name = inp_name
+inp.datatype = inp_dtype
+inp.shape.extend(inp_shape)
+
+
+mean = np.array(mean).reshape(1, 1, 3)
+std = np.array(std).reshape(1, 1, 3)
+im = cv2.imread(impth)[:, :, ::-1]
+im = cv2.resize(im, dsize=tuple(inp_shape[-1:-3:-1]))
+im = ((im / 255.) - mean) / std
+im = im[None, ...].transpose(0, 3, 1, 2)
+inp_bytes = im.astype(np.float32).tobytes()
+
+request.ClearField("inputs")
+request.ClearField("raw_input_contents")
+request.inputs.extend([inp,])
+request.raw_input_contents.extend([inp_bytes,])
+
+
+outp = service_pb2.ModelInferRequest().InferRequestedOutputTensor()
+outp.name = outp_name
+request.outputs.extend([outp,])
+
+# sync
+#  resp = grpc_stub.ModelInfer(request).raw_output_contents[0]
+# async
+resp = grpc_stub.ModelInfer.future(request)
+resp = resp.result().raw_output_contents[0]
+
+out = np.frombuffer(resp, dtype=outp_dtype).reshape(*outp_shape)
+
+out = palette[out]
+cv2.imwrite('res.png', out)
diff --git a/tis/cpp_client/CMakeLists.txt b/tis/cpp_client/CMakeLists.txt
@@ -0,0 +1,29 @@
+cmake_minimum_required (VERSION 3.18)
+
+project(Samples)
+
+set(CMAKE_CXX_FLAGS "-std=c++14 -O1")
+set(CMAKE_BUILD_TYPE Release)
+
+set(CMAKE_PREFIX_PATH
+    /opt/triton_client/
+    /opt/opencv/lib/cmake/opencv4)
+find_package(OpenCV REQUIRED)
+
+include_directories(
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ${OpenCV_INCLUDE_DIRS}
+    /opt/triton_client/include
+)
+link_directories(
+    /opt/triton_client/lib
+    )
+
+
+add_executable(client main.cpp)
+target_link_libraries(client PRIVATE
+    grpcclient
+    ${OpenCV_LIBS}
+    -lpthread
+    )