diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f91b760e920..b901f41c29f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,7 @@ option(USE_JEMALLOC "Build with Jemalloc support" OFF) option(USE_LIBJPEG_TURBO "Use libjpeg-turbo" OFF) option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) +option(USE_CPP_PACKAGE "Build C++ Package" OFF) option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON) option(USE_GPROF "Compile with gprof (profiling) flag" OFF) option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path @@ -296,6 +297,10 @@ if(USE_ONEDNN) set_target_properties(dnnl PROPERTIES CXX_CLANG_TIDY "") # don't lint 3rdparty dependency endif() +if(USE_CPP_PACKAGE) + add_definitions(-DMXNET_USE_CPP_PACKAGE=1) +endif() + if(USE_INTGEMM) message(STATUS "Using intgemm") add_subdirectory(3rdparty/intgemm EXCLUDE_FROM_ALL) @@ -960,6 +965,11 @@ if(INSTALL_PYTHON_VERSIONS) endforeach() endif() +if(USE_CPP_PACKAGE) + add_subdirectory(cpp-package) + target_compile_definitions(mxnet PUBLIC MXNET_USE_CPP_PACKAGE=1) +endif() + if(NOT CMAKE_BUILD_TYPE STREQUAL "Distribution") # Staticbuild applies linker version script to hide private symbols, breaking unit tests add_subdirectory(tests) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index e53a57bde3ec..974215524795 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -625,12 +625,18 @@ build_ubuntu_gpu_onednn_nocudnn() { build_ubuntu_gpu() { set -ex cd /work/build + # Work around to link libcuda to libmxnet + # should be removed after https://github.com/apache/incubator-mxnet/issues/17858 is resolved. + ln -s -f /usr/local/cuda/targets/x86_64-linux/lib/stubs/libcuda.so libcuda.so.1 + export LIBRARY_PATH=${LIBRARY_PATH}:/work/build + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/work/build CC=gcc-7 CXX=g++-7 cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_CUDA=ON \ -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=ON \ + -DUSE_CPP_PACKAGE=ON \ -DUSE_BLAS=Open \ -DUSE_ONEDNN=OFF \ -DUSE_DIST_KVSTORE=ON \ @@ -707,7 +713,7 @@ sanity_license() { sanity_cpp() { set -ex - 3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin tests --exclude_path src/operator/contrib/ctc_include include/onednn + 3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests --exclude_path src/operator/contrib/ctc_include include/onednn } sanity_python() { @@ -878,6 +884,12 @@ unittest_centos7_gpu() { pytest --durations=50 --cov-report xml:tests_gpu.xml --cov-append --verbose tests/python/gpu/test_amp_init.py } +integrationtest_ubuntu_cpp_package_gpu() { + set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 + cpp-package/tests/ci_test.sh +} + integrationtest_ubuntu_cpu_onnx() { set -ex export PYTHONPATH=./python/ @@ -1159,9 +1171,11 @@ build_docs() { pushd docs/_build tar -xzf jekyll-artifacts.tgz python_doc_folder='html/api/python/docs' + api_folder='html/api' # Python has it's own landing page/site so we don't put it in /docs/api mkdir -p $python_doc_folder && tar -xzf python-artifacts.tgz --directory $python_doc_folder + mkdir -p $api_folder/cpp/docs/api && tar -xzf c-artifacts.tgz --directory $api_folder/cpp/docs/api # check if .htaccess file exists if [ ! -f "html/.htaccess" ]; then @@ -1210,7 +1224,9 @@ build_docs_beta() { pushd docs/_build tar -xzf jekyll-artifacts.tgz python_doc_folder="html/versions/$BRANCH/api/python/docs" + cpp_doc_folder="html/versions/$BRANCH/api/cpp/docs" mkdir -p $python_doc_folder && tar -xzf python-artifacts.tgz --directory $python_doc_folder + mkdir -p $cpp_doc_folder && tar -xzf c-artifacts.tgz --directory $cpp_doc_folder GZIP=-9 tar -zcvf beta_website.tgz -C html . popd } diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index ae3786297f46..d3ad36142325 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -34,9 +34,9 @@ mx_cmake_lib_cython = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, mx_cmake_lib_debug = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, build/tests/mxnet_unit_tests' mx_onednn_lib = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, example/extensions/lib_external_ops/build/libexternal_lib.so' mx_tensorrt_lib = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so' -mx_lib_cpp_examples = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, example/extensions/lib_external_ops/build/libexternal_lib.so, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so' +mx_lib_cpp_examples = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, example/extensions/lib_external_ops/build/libexternal_lib.so, build/cpp-package/example/*, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so' mx_lib_cpp_examples_no_tvm_op = 'build/libmxnet.so, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so' -mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf' +mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/cpp-package/example/*' mx_cd_lib = 'lib/libmxnet.so, licenses/*, lib/libgfortran.so.*, lib/libopenblas.so.0, include/onednn/oneapi/dnnl/dnnl_version.h, include/onednn/oneapi/dnnl/dnnl_config.h' @@ -85,7 +85,7 @@ def compile_unix_cpu_openblas(lib_name) { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.docker_run('ubuntu_cpu', 'build_ubuntu_cpu_openblas', false) - utils.pack_lib(lib_name, mx_lib_cython, true) + utils.pack_lib(lib_name, mx_lib_cpp_examples, true) } } } @@ -846,6 +846,20 @@ def test_unix_distributed_kvstore_gpu(lib_name) { }] } +def test_unix_cpp_package_gpu(lib_name) { + return ['cpp-package GPU Makefile': { + node(NODE_LINUX_GPU_G4) { + ws('workspace/it-cpp-package-gpu') { + timeout(time: max_time, unit: 'MINUTES') { + utils.unpack_and_init(lib_name, mx_lib_cpp_examples) + utils.docker_run('ubuntu_gpu_cu111', 'integrationtest_ubuntu_cpp_package_gpu', true) + utils.publish_test_coverage() + } + } + } + }] +} + def test_centos7_python3_cpu(lib_name) { return ['Python3: CentOS 7 CPU': { node(NODE_LINUX_CPU) { @@ -1048,6 +1062,23 @@ def docs_python(lib_name) { }] } +// Call this function from Jenkins to generate just the C and C++ API microsite artifacts. +def docs_c(lib_name) { + return ['C Docs': { + node(NODE_LINUX_CPU) { + ws('workspace/docs') { + timeout(time: max_time, unit: 'MINUTES') { + utils.unpack_and_init(lib_name, mx_lib, false) + utils.docker_run('ubuntu_cpu', 'build_c_docs', false) + if (should_pack_website()) { + utils.pack_lib('c-artifacts', 'docs/_build/c-artifacts.tgz', false) + } + } + } + } + }] +} + // Call this function from Jenkins to generate just the main website artifacts. def docs_jekyll() { @@ -1078,6 +1109,7 @@ def docs_prepare() { utils.init_git() unstash 'jekyll-artifacts' + unstash 'c-artifacts' unstash 'python-artifacts' utils.docker_run('ubuntu_cpu_jekyll', 'build_docs', false) @@ -1104,6 +1136,7 @@ def docs_full_website() { utils.init_git() unstash 'jekyll-artifacts' + unstash 'c-artifacts' unstash 'python-artifacts' utils.docker_run('ubuntu_cpu_jekyll', 'build_docs', false) @@ -1126,6 +1159,7 @@ def docs_prepare_beta() { utils.init_git() unstash 'jekyll-artifacts' + unstash 'c-artifacts' unstash 'python-artifacts' utils.docker_run('ubuntu_cpu_jekyll', 'build_docs_beta', false) diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index ef385af2078d..2beb0f4aa1f4 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -46,6 +46,7 @@ core_logic: { custom_steps.test_unix_python3_gpu('gpu'), custom_steps.test_unix_python3_onednn_gpu('onednn_gpu'), custom_steps.test_unix_python3_onednn_nocudnn_gpu('onednn_gpu_nocudnn'), + custom_steps.test_unix_cpp_package_gpu('gpu'), // TODO(szha): fix and reenable the hanging issue. tracked in #18098 // custom_steps.test_unix_distributed_kvstore_gpu('gpu'), custom_steps.test_unix_byteps_gpu('gpu'), diff --git a/ci/jenkins/Jenkinsfile_website_full b/ci/jenkins/Jenkinsfile_website_full index 03b576d47cb3..d2de41132d72 100644 --- a/ci/jenkins/Jenkinsfile_website_full +++ b/ci/jenkins/Jenkinsfile_website_full @@ -40,6 +40,7 @@ core_logic: { utils.parallel_stage('Build Docs', [ custom_steps.docs_jekyll(), + custom_steps.docs_c('libmxnet'), custom_steps.docs_python('libmxnet'), ]) diff --git a/ci/jenkins/Jenkinsfile_website_full_pr b/ci/jenkins/Jenkinsfile_website_full_pr index 3b0c0964f439..7ac880fc9127 100644 --- a/ci/jenkins/Jenkinsfile_website_full_pr +++ b/ci/jenkins/Jenkinsfile_website_full_pr @@ -40,6 +40,7 @@ core_logic: { utils.parallel_stage('Build Docs', [ // Optimization would be to flag these not to stash if not previewing them custom_steps.docs_jekyll(), + custom_steps.docs_c('libmxnet'), custom_steps.docs_python('libmxnet'), ]) diff --git a/ci/jenkins/Jenkinsfile_website_nightly b/ci/jenkins/Jenkinsfile_website_nightly index f180f0ac6c7c..6fa5d1a9396f 100644 --- a/ci/jenkins/Jenkinsfile_website_nightly +++ b/ci/jenkins/Jenkinsfile_website_nightly @@ -40,6 +40,7 @@ core_logic: { utils.parallel_stage('Build Docs', [ custom_steps.docs_jekyll(), + custom_steps.docs_c('libmxnet'), custom_steps.docs_python('libmxnet'), ]) diff --git a/ci/jenkins/Jenkinsfile_website_version_artifacts b/ci/jenkins/Jenkinsfile_website_version_artifacts index 7f74f1a9e076..01daa05210b8 100644 --- a/ci/jenkins/Jenkinsfile_website_version_artifacts +++ b/ci/jenkins/Jenkinsfile_website_version_artifacts @@ -40,6 +40,7 @@ core_logic: { utils.parallel_stage('Build Docs', [ custom_steps.docs_jekyll(), + custom_steps.docs_c('libmxnet'), custom_steps.docs_python('libmxnet'), ]) diff --git a/cpp-package/CMakeLists.txt b/cpp-package/CMakeLists.txt new file mode 100644 index 000000000000..2a168e7c824c --- /dev/null +++ b/cpp-package/CMakeLists.txt @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 3.13) +project(mxnet_cpp C CXX) + +add_library(mxnet_cpp INTERFACE) + +set(CPP_PACKAGE_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/include/) +target_include_directories(mxnet_cpp INTERFACE "${CPP_PACKAGE_INCLUDE_DIR}") +file(GLOB_RECURSE CPP_PACKAGE_HEADERS + "${CPP_PACKAGE_INCLUDE_DIR}/*.h" + "${CPP_PACKAGE_INCLUDE_DIR}/*.hpp") +set(CPP_PACKAGE_OP_H_HEADER ${CMAKE_CURRENT_LIST_DIR}/include/mxnet-cpp/op.h) +target_sources(mxnet_cpp INTERFACE ${CPP_PACKAGE_HEADERS} ${CPP_PACKAGE_OP_H_HEADER}) +target_link_libraries(mxnet_cpp INTERFACE mxnet ${mxnet_LINKER_LIBS}) + +add_custom_target( + cpp_package_op_h ALL + BYPRODUCTS ${CPP_PACKAGE_OP_H_HEADER} + MAIN_DEPENDENCY mxnet + DEPENDS mxnet ${CMAKE_CURRENT_SOURCE_DIR}/scripts/OpWrapperGenerator.py + COMMAND echo "Running: OpWrapperGenerator.py" + COMMAND python3 OpWrapperGenerator.py $ + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts +) +add_dependencies(mxnet_cpp cpp_package_op_h) + +if(MSVC) + target_compile_options(mxnet_cpp INTERFACE "/utf-8") +endif(MSVC) + +if(BUILD_CPP_EXAMPLES) + add_subdirectory(example) + add_subdirectory(example/inference) +endif() + +install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) diff --git a/cpp-package/README.md b/cpp-package/README.md new file mode 100644 index 000000000000..90808688077f --- /dev/null +++ b/cpp-package/README.md @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + +# MXNet - C++ API + +The MXNet C++ Package provides C++ API bindings to the users of MXNet. Currently, these bindings are not available as standalone package. +The users of these bindings are required to build this package as mentioned below. + +## Building C++ Package + +The cpp-package directory contains the implementation of C++ API. Users are required to build this directory or package before using it. +**The cpp-package is built while building the MXNet shared library, *libmxnet.so*, with *USE\_CPP\_PACKAGE* option turned on. Please follow the steps to build the C++ package** + +### Steps to build the C++ package: +1. Building the MXNet C++ package requires building MXNet from source. +2. Clone the MXNet GitHub repository **recursively** to ensure the code in submodules is available for building MXNet. + ``` + git clone --recursive https://github.com/apache/incubator-mxnet mxnet + ``` + +3. Install the [recommended dependencies](https://mxnet.apache.org/versions/master/get_started/build_from_source.html#installing-mxnet's-recommended-dependencies) and [optional dependencies](https://mxnet.apache.org/versions/master/get_started/build_from_source.html#overview-of-optional-dependencies-and-optional-features) for building MXNet from source. +4. There is a configuration file for cmake, [config/*.cmake]() that contains all the compilation options. You can edit this file and set the appropriate options prior to running the **cmake** command. +5. Please refer to [cmake configuration files](https://github.com/apache/incubator-mxnet/blob/970a2cfbe77d09ee610fdd70afca1a93247cf4fb/config/linux_gpu.cmake#L18-L37) for more details on how to configure and compile MXNet. +6. For enabling the build of C++ Package, set the **-DUSE\_CPP\_PACKAGE = 1** in cmake options. + +### Cross-Compilation steps: +1. Build the C++ package for the **host** platform to generate op.h file. +2. Remove the following line in [CMakeLists.txt](). + ``` + COMMAND python OpWrapperGenerator.py $ + ``` +3. Re-configure cmake for cross-compilation to build the **target** C++ package. + +## Usage + +In order to consume the C++ API please follow the steps below. + +1. Ensure that the MXNet shared library is built from source with the **USE\_CPP\_PACKAGE = 1**. +2. Include the [MxNetCpp.h]() in the program that is going to consume MXNet C++ API. + ```c++ + #include + ``` +3. While building the program, ensure that the correct paths to the directories containing header files and MXNet shared library. +4. The program links the MXNet shared library dynamically. Hence the library needs to be accessible to the program during runtime. This can be achieved by including the path to the shared library in the environment variable **LD\_LIBRARY\_PATH** for Linux, Mac. and Ubuntu OS and **PATH** for Windows OS. + + +## Tutorial + +A basic tutorial can be found at . + +## Examples + +The example directory contains examples for you to get started. Please build the MXNet C++ Package before building the examples. diff --git a/cpp-package/example/CMakeLists.txt b/cpp-package/example/CMakeLists.txt new file mode 100644 index 000000000000..ed37668d7011 --- /dev/null +++ b/cpp-package/example/CMakeLists.txt @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Explicitly set GENERATED property https://gitlab.kitware.com/cmake/cmake/issues/18399 +set_property(SOURCE ${CMAKE_CURRENT_LIST_DIR}/../include/mxnet-cpp/op.h PROPERTY GENERATED 1) + +add_executable(lenet lenet.cpp) +target_link_libraries(lenet mxnet_cpp) + +add_executable(lenet_with_mxdataiter lenet_with_mxdataiter.cpp) +target_link_libraries(lenet_with_mxdataiter mxnet_cpp) + +add_executable(alexnet alexnet.cpp) +target_link_libraries(alexnet mxnet_cpp) + +add_executable(charRNN charRNN.cpp) +target_link_libraries(charRNN mxnet_cpp) + +add_executable(googlenet googlenet.cpp) +target_link_libraries(googlenet mxnet_cpp) + +add_executable(inception_bn inception_bn.cpp) +target_link_libraries(inception_bn mxnet_cpp) + +add_executable(mlp mlp.cpp) +target_link_libraries(mlp mxnet_cpp) + +add_executable(mlp_cpu mlp_cpu.cpp) +target_link_libraries(mlp_cpu mxnet_cpp) + +add_executable(mlp_gpu mlp_gpu.cpp) +target_link_libraries(mlp_gpu mxnet_cpp) + +add_executable(resnet resnet.cpp) +target_link_libraries(resnet mxnet_cpp) + +add_executable(test_optimizer test_optimizer.cpp) +target_link_libraries(test_optimizer mxnet_cpp) + +add_executable(test_ndarray_copy test_ndarray_copy.cpp) +target_link_libraries(test_ndarray_copy mxnet_cpp) + +add_executable(test_score test_score.cpp) +target_link_libraries(test_score mxnet_cpp) + +add_executable(mlp_csv mlp_csv.cpp) +target_link_libraries(mlp_csv mxnet_cpp) + +add_executable(test_kvstore test_kvstore.cpp) +target_link_libraries(test_kvstore mxnet_cpp) + +add_executable(test_regress_label test_regress_label.cpp) +target_link_libraries(test_regress_label mxnet_cpp) + +add_executable(sentiment_analysis_rnn ./inference/sentiment_analysis_rnn.cpp) +target_link_libraries(sentiment_analysis_rnn mxnet_cpp) + +add_executable(multi_threaded_inference ./inference/multi_threaded_inference/multi_threaded_inference.cc) +target_link_libraries(multi_threaded_inference mxnet_cpp) + +if(MSVC) + add_custom_target(cpp_package_deploy_library ALL + DEPENDS mxnet + COMMAND ${CMAKE_COMMAND} -E copy $ $) +endif() diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md new file mode 100644 index 000000000000..65c967e0cb04 --- /dev/null +++ b/cpp-package/example/README.md @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + +# MXNet C++ Package Examples + +## Building C++ examples + +The examples in this folder demonstrate the **training** workflow. The **inference workflow** related examples can be found in [inference]() folder. +Please build the MXNet C++ Package as explained in the [README]() File. +The examples in this folder are built while building the MXNet library and cpp-package from source. You can get the executable files by just copying them from ```incubator-mxnet/build/cpp-package/example``` + +The examples that are built to be run on GPU may not work on the non-GPU machines. + +## Examples demonstrating training workflow + +This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS. For example `export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/home/ubuntu/incubator-mxnet/build` on ubuntu using gpu. + +### [alexnet.cpp]() + +The example implements the C++ version of AlexNet. The networks trains on MNIST data. The number of epochs can be specified as a command line argument. For example to train with 10 epochs use the following: + +``` +build/alexnet 10 +``` + +### [googlenet.cpp]() + +The code implements a GoogLeNet/Inception network using the C++ API. The example uses MNIST data to train the network. By default, the example trains the model for 100 epochs. The number of epochs can also be specified in the command line. For example, to train the model for 10 epochs use the following: + +``` +build/googlenet 10 +``` + +### [mlp.cpp]() + +The code implements a multilayer perceptron from scratch. The example creates its own dummy data to train the model. The example does not require command line parameters. It trains the model for 20,000 epochs. +To run the example use the following command: + +``` +build/mlp +``` + +### [mlp_cpu.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind" C++ API and MNISTIter. The example is designed to work on CPU. The example does not require command line parameters. +To run the example use the following command: + +``` +build/mlp_cpu +``` + +### [mlp_gpu.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and MNISTIter. The example is designed to work on GPU. The example does not require command line arguments. To run the example execute following command: + +``` +build/mlp_gpu +``` + +### [mlp_csv.cpp]() + +The code implements a multilayer perceptron to train the MNIST data. The code demonstrates the use of the "SimpleBind" C++ API and CSVIter. The CSVIter can iterate data that is in CSV format. The example can be run on CPU or GPU. The example usage is as follows: + +``` +build/mlp_csv --train data/mnist_data/mnist_train.csv --test data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128 64 64" --gpu +``` +* To get the `mnist_training_set.csv` and `mnist_test_set.csv` please run the following command: +```python +# in incubator-mxnet/cpp-package/example directory +python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000 +python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000 +``` + +### [resnet.cpp]() + +The code implements a resnet model using the C++ API. The model is used to train MNIST data. The number of epochs for training the model can be specified on the command line. By default, model is trained for 100 epochs. For example, to train with 10 epochs use the following command: + +``` +build/resnet 10 +``` + +### [lenet.cpp]() + +The code implements a lenet model using the C++ API. It uses MNIST training data in CSV format to train the network. The example does not use built-in CSVIter to read the data from CSV file. The number of epochs can be specified on the command line. By default, the mode is trained for 100,000 epochs. For example, to train with 10 epochs use the following command: + +``` +build/lenet 10 +``` +### [lenet\_with\_mxdataiter.cpp]() + +The code implements a lenet model using the C++ API. It uses MNIST training data to train the network. The example uses built-in MNISTIter to read the data. The number of epochs can be specified on the command line. By default, the mode is trained for 100 epochs. For example, to train with 10 epochs use the following command: + +``` +build/lenet_with_mxdataiter 10 +``` + +In addition, there is `run_lenet_with_mxdataiter.sh` that downloads the mnist data and run `lenet_with_mxdataiter` example. + +### [inception_bn.cpp]() + +The code implements an Inception network using the C++ API with batch normalization. The example uses MNIST data to train the network. The model trains for 100 epochs. The example can be run by executing the following command: + +``` +build/inception_bn +``` diff --git a/cpp-package/example/alexnet.cpp b/cpp-package/example/alexnet.cpp new file mode 100644 index 000000000000..00bd1c592eed --- /dev/null +++ b/cpp-package/example/alexnet.cpp @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +Symbol AlexnetSymbol(int num_classes) { + auto input_data = Symbol::Variable("data"); + auto target_label = Symbol::Variable("label"); + /*stage 1*/ + auto conv1 = Operator("Convolution") + .SetParam("kernel", Shape(11, 11)) + .SetParam("num_filter", 96) + .SetParam("stride", Shape(4, 4)) + .SetParam("dilate", Shape(1, 1)) + .SetParam("pad", Shape(0, 0)) + .SetParam("num_group", 1) + .SetParam("workspace", 512) + .SetParam("no_bias", false) + .SetInput("data", input_data) + .CreateSymbol("conv1"); + auto relu1 = Operator("Activation") + .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ + .SetInput("data", conv1) + .CreateSymbol("relu1"); + auto pool1 = Operator("Pooling") + .SetParam("kernel", Shape(3, 3)) + .SetParam("pool_type", "max") /*avg,max,sum */ + .SetParam("global_pool", false) + .SetParam("stride", Shape(2, 2)) + .SetParam("pad", Shape(0, 0)) + .SetInput("data", relu1) + .CreateSymbol("pool1"); + auto lrn1 = Operator("LRN") + .SetParam("nsize", 5) + .SetParam("alpha", 0.0001) + .SetParam("beta", 0.75) + .SetParam("knorm", 1) + .SetInput("data", pool1) + .CreateSymbol("lrn1"); + /*stage 2*/ + auto conv2 = Operator("Convolution") + .SetParam("kernel", Shape(5, 5)) + .SetParam("num_filter", 256) + .SetParam("stride", Shape(1, 1)) + .SetParam("dilate", Shape(1, 1)) + .SetParam("pad", Shape(2, 2)) + .SetParam("num_group", 1) + .SetParam("workspace", 512) + .SetParam("no_bias", false) + .SetInput("data", lrn1) + .CreateSymbol("conv2"); + auto relu2 = Operator("Activation") + .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ + .SetInput("data", conv2) + .CreateSymbol("relu2"); + auto pool2 = Operator("Pooling") + .SetParam("kernel", Shape(3, 3)) + .SetParam("pool_type", "max") /*avg,max,sum */ + .SetParam("global_pool", false) + .SetParam("stride", Shape(2, 2)) + .SetParam("pad", Shape(0, 0)) + .SetInput("data", relu2) + .CreateSymbol("pool2"); + auto lrn2 = Operator("LRN") + .SetParam("nsize", 5) + .SetParam("alpha", 0.0001) + .SetParam("beta", 0.75) + .SetParam("knorm", 1) + .SetInput("data", pool2) + .CreateSymbol("lrn2"); + /*stage 3*/ + auto conv3 = Operator("Convolution") + .SetParam("kernel", Shape(3, 3)) + .SetParam("num_filter", 384) + .SetParam("stride", Shape(1, 1)) + .SetParam("dilate", Shape(1, 1)) + .SetParam("pad", Shape(1, 1)) + .SetParam("num_group", 1) + .SetParam("workspace", 512) + .SetParam("no_bias", false) + .SetInput("data", lrn2) + .CreateSymbol("conv3"); + auto relu3 = Operator("Activation") + .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ + .SetInput("data", conv3) + .CreateSymbol("relu3"); + auto conv4 = Operator("Convolution") + .SetParam("kernel", Shape(3, 3)) + .SetParam("num_filter", 384) + .SetParam("stride", Shape(1, 1)) + .SetParam("dilate", Shape(1, 1)) + .SetParam("pad", Shape(1, 1)) + .SetParam("num_group", 1) + .SetParam("workspace", 512) + .SetParam("no_bias", false) + .SetInput("data", relu3) + .CreateSymbol("conv4"); + auto relu4 = Operator("Activation") + .SetParam("act_type", "relu") /*relu,sigmoid,softrelu,tanh */ + .SetInput("data", conv4) + .CreateSymbol("relu4"); + auto conv5 = Operator("Convolution") + .SetParam("kernel", Shape(3, 3)) + .SetParam("num_filter", 256) + .SetParam("stride", Shape(1, 1)) + .SetParam("dilate", Shape(1, 1)) + .SetParam("pad", Shape(1, 1)) + .SetParam("num_group", 1) + .SetParam("workspace", 512) + .SetParam("no_bias", false) + .SetInput("data", relu4) + .CreateSymbol("conv5"); + auto relu5 = Operator("Activation") + .SetParam("act_type", "relu") + .SetInput("data", conv5) + .CreateSymbol("relu5"); + auto pool3 = Operator("Pooling") + .SetParam("kernel", Shape(3, 3)) + .SetParam("pool_type", "max") + .SetParam("global_pool", false) + .SetParam("stride", Shape(2, 2)) + .SetParam("pad", Shape(0, 0)) + .SetInput("data", relu5) + .CreateSymbol("pool3"); + /*stage4*/ + auto flatten = + Operator("Flatten").SetInput("data", pool3).CreateSymbol("flatten"); + auto fc1 = Operator("FullyConnected") + .SetParam("num_hidden", 4096) + .SetParam("no_bias", false) + .SetInput("data", flatten) + .CreateSymbol("fc1"); + auto relu6 = Operator("Activation") + .SetParam("act_type", "relu") + .SetInput("data", fc1) + .CreateSymbol("relu6"); + auto dropout1 = Operator("Dropout") + .SetParam("p", 0.5) + .SetInput("data", relu6) + .CreateSymbol("dropout1"); + /*stage5*/ + auto fc2 = Operator("FullyConnected") + .SetParam("num_hidden", 4096) + .SetParam("no_bias", false) + .SetInput("data", dropout1) + .CreateSymbol("fc2"); + auto relu7 = Operator("Activation") + .SetParam("act_type", "relu") + .SetInput("data", fc2) + .CreateSymbol("relu7"); + auto dropout2 = Operator("Dropout") + .SetParam("p", 0.5) + .SetInput("data", relu7) + .CreateSymbol("dropout2"); + /*stage6*/ + auto fc3 = Operator("FullyConnected") + .SetParam("num_hidden", num_classes) + .SetParam("no_bias", false) + .SetInput("data", dropout2) + .CreateSymbol("fc3"); + auto softmax = Operator("SoftmaxOutput") + .SetParam("grad_scale", 1) + .SetParam("ignore_label", -1) + .SetParam("multi_output", false) + .SetParam("use_ignore", false) + .SetParam("normalization", "null") /*batch,null,valid */ + .SetInput("data", fc3) + .SetInput("label", target_label) + .CreateSymbol("softmax"); + return softmax; +} + +NDArray ResizeInput(NDArray data, const Shape new_shape) { + NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); + NDArray pic_1channel; + Operator("_contrib_BilinearResize2D") + .SetParam("height", new_shape[2]) + .SetParam("width", new_shape[3]) + (pic).Invoke(pic_1channel); + NDArray output; + Operator("tile") + .SetParam("reps", Shape(1, 3, 1, 1)) + (pic_1channel).Invoke(output); + return output; +} + +int main(int argc, char const *argv[]) { + /*basic config*/ + int max_epo = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; + float learning_rate = 1e-4; + float weight_decay = 1e-4; + + /*context*/ + auto ctx = Context::cpu(); + int num_gpu; + MXGetGPUCount(&num_gpu); + int batch_size = 32; +#if MXNET_USE_CUDA + if (num_gpu > 0) { + ctx = Context::gpu(); + batch_size = 256; + } +#endif + + TRY + /*net symbol*/ + auto Net = AlexnetSymbol(10); + + /*args_map and aux_map is used for parameters' saving*/ + std::map args_map; + std::map aux_map; + + /*we should tell mxnet the shape of data and label*/ + const Shape data_shape = Shape(batch_size, 3, 256, 256), + label_shape = Shape(batch_size); + args_map["data"] = NDArray(data_shape, ctx); + args_map["label"] = NDArray(label_shape, ctx); + + /*with data and label, executor can be generated automatically*/ + auto *exec = Net.SimpleBind(ctx, args_map); + auto arg_names = Net.ListArguments(); + aux_map = exec->aux_dict(); + args_map = exec->arg_dict(); + + /*if fine tune from some pre-trained model, we should load the parameters*/ + // NDArray::Load("./model/alex_params_3", nullptr, &args_map); + /*else, we should use initializer Xavier to init the params*/ + auto initializer = Uniform(0.07); + for (auto &arg : args_map) { + /*be careful here, the arg's name must has some specific ends or starts for + * initializer to call*/ + initializer(arg.first, &arg.second); + } + + /*these binary files should be generated using im2rc tools, which can be found + * in mxnet/bin*/ + std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", + "./data/mnist_data/train-labels-idx1-ubyte", + "./data/mnist_data/t10k-images-idx3-ubyte", + "./data/mnist_data/t10k-labels-idx1-ubyte" + }; + + auto train_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { + return 1; + } + + auto val_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { + return 1; + } + + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("momentum", 0.9) + ->SetParam("rescale_grad", 1.0 / batch_size) + ->SetParam("clip_gradient", 10) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + Accuracy acu_train, acu_val; + LogLoss logloss_train, logloss_val; + for (int epoch = 0; epoch < max_epo; ++epoch) { + LG << "Train Epoch: " << epoch; + /*reset the metric every epoch*/ + acu_train.Reset(); + /*reset the data iter every epoch*/ + train_iter.Reset(); + int iter = 0; + while (train_iter.Next()) { + auto batch = train_iter.GetDataBatch(); + /*use copyto to feed new data and label to the executor*/ + ResizeInput(batch.data, data_shape).CopyTo(&args_map["data"]); + batch.label.CopyTo(&args_map["label"]); + exec->Forward(true); + exec->Backward(); + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "data" || arg_names[i] == "label") continue; + opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); + } + + NDArray::WaitAll(); + acu_train.Update(batch.label, exec->outputs[0]); + logloss_train.Reset(); + logloss_train.Update(batch.label, exec->outputs[0]); + ++iter; + LG << "EPOCH: " << epoch << " ITER: " << iter + << " Train Accuracy: " << acu_train.Get() + << " Train Loss: " << logloss_train.Get(); + } + LG << "EPOCH: " << epoch << " Train Accuracy: " << acu_train.Get(); + + LG << "Val Epoch: " << epoch; + acu_val.Reset(); + val_iter.Reset(); + logloss_val.Reset(); + iter = 0; + while (val_iter.Next()) { + auto batch = val_iter.GetDataBatch(); + ResizeInput(batch.data, data_shape).CopyTo(&args_map["data"]); + batch.label.CopyTo(&args_map["label"]); + exec->Forward(false); + NDArray::WaitAll(); + acu_val.Update(batch.label, exec->outputs[0]); + logloss_val.Update(batch.label, exec->outputs[0]); + LG << "EPOCH: " << epoch << " ITER: " << iter << " Val Accuracy: " << acu_val.Get(); + ++iter; + } + LG << "EPOCH: " << epoch << " Val Accuracy: " << acu_val.Get(); + LG << "EPOCH: " << epoch << " Val LogLoss: " << logloss_val.Get(); + + /*save the parameters*/ + std::stringstream ss; + ss << epoch; + std::string epoch_str; + ss >> epoch_str; + std::string save_path_param = "alex_param_" + epoch_str; + auto save_args = args_map; + /*we do not want to save the data and label*/ + save_args.erase(save_args.find("data")); + save_args.erase(save_args.find("label")); + /*the alexnet does not get any aux array, so we do not need to save + * aux_map*/ + LG << "EPOCH: " << epoch << " Saving to..." << save_path_param; + NDArray::Save(save_path_param, save_args); + } + /*don't foget to release the executor*/ + delete exec; + delete opt; + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/charRNN.cpp b/cpp-package/example/charRNN.cpp new file mode 100644 index 000000000000..0b87abf72343 --- /dev/null +++ b/cpp-package/example/charRNN.cpp @@ -0,0 +1,759 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Hua Zhang mz24cn@hotmail.com + * The code implements C++ version charRNN for mxnet\example\rnn\char-rnn.ipynb with MXNet.cpp API. + * The generated params file is compatiable with python version. + * train() and predict() has been verified with original data samples. + * 2017/1/23: + * Add faster version charRNN based on built-in cuDNN RNN operator, 10 times faster. + * Add time major computation graph, although no substantial performance difference. + * Support continuing training from last params file. + * Rename params file epoch number starts from zero. + */ + +#if _MSC_VER +#pragma warning(disable: 4996) // VS2015 complains on 'std::copy' ... +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" +#include "utils.h" + +using namespace mxnet::cpp; + +struct LSTMState { + Symbol C; + Symbol h; +}; + +struct LSTMParam { + Symbol i2h_weight; + Symbol i2h_bias; + Symbol h2h_weight; + Symbol h2h_bias; +}; + +bool TIME_MAJOR = true; + +// LSTM Cell symbol +LSTMState LSTM(int num_hidden, const Symbol& indata, const LSTMState& prev_state, + const LSTMParam& param, int seqidx, int layeridx, mx_float dropout = 0) { + auto input = dropout > 0? Dropout(indata, dropout) : indata; + auto prefix = std::string("t") + std::to_string(seqidx) + "_l" + std::to_string(layeridx); + auto i2h = FullyConnected(prefix + "_i2h", input, param.i2h_weight, param.i2h_bias, + num_hidden * 4); + auto h2h = FullyConnected(prefix + "_h2h", prev_state.h, param.h2h_weight, param.h2h_bias, + num_hidden * 4); + auto gates = i2h + h2h; + auto slice_gates = SliceChannel(prefix + "_slice", gates, 4); + auto in_gate = Activation(slice_gates[0], ActivationActType::kSigmoid); + auto in_transform = Activation(slice_gates[1], ActivationActType::kTanh); + auto forget_gate = Activation(slice_gates[2], ActivationActType::kSigmoid); + auto out_gate = Activation(slice_gates[3], ActivationActType::kSigmoid); + + LSTMState state; + state.C = (forget_gate * prev_state.C) + (in_gate * in_transform); + state.h = out_gate * Activation(state.C, ActivationActType::kTanh); + return state; +} + +Symbol LSTMUnroll(int num_lstm_layer, int sequence_length, int input_dim, + int num_hidden, int num_embed, mx_float dropout = 0) { + auto isTrain = sequence_length > 1; + auto data = Symbol::Variable("data"); + if (TIME_MAJOR && isTrain) + data = transpose(data); + auto embed_weight = Symbol::Variable("embed_weight"); + auto embed = Embedding("embed", data, embed_weight, input_dim, num_embed); + auto wordvec = isTrain? SliceChannel(embed, sequence_length, TIME_MAJOR? 0 : 1, true) : embed; + + std::vector last_states; + std::vector param_cells; + for (int l = 0; l < num_lstm_layer; l++) { + std::string layer = "l" + std::to_string(l); + LSTMParam param; + param.i2h_weight = Symbol::Variable(layer + "_i2h_weight"); + param.i2h_bias = Symbol::Variable(layer + "_i2h_bias"); + param.h2h_weight = Symbol::Variable(layer + "_h2h_weight"); + param.h2h_bias = Symbol::Variable(layer + "_h2h_bias"); + param_cells.push_back(param); + LSTMState state; + state.C = Symbol::Variable(layer + "_init_c"); + state.h = Symbol::Variable(layer + "_init_h"); + last_states.push_back(state); + } + + std::vector hidden_all; + for (int i = 0; i < sequence_length; i++) { + auto hidden = wordvec[i]; + for (int layer = 0; layer < num_lstm_layer; layer++) { + double dp_ratio = layer == 0? 0 : dropout; + auto next_state = LSTM(num_hidden, hidden, last_states[layer], param_cells[layer], + i, layer, dp_ratio); + hidden = next_state.h; + last_states[layer] = next_state; + } + if (dropout > 0) + hidden = Dropout(hidden, dropout); + hidden_all.push_back(hidden); + } + + auto hidden_concat = isTrain? Concat(hidden_all, hidden_all.size(), 0) : hidden_all[0]; + auto cls_weight = Symbol::Variable("cls_weight"); + auto cls_bias = Symbol::Variable("cls_bias"); + auto pred = FullyConnected("pred", hidden_concat, cls_weight, cls_bias, input_dim); + + auto label = Symbol::Variable("softmax_label"); + label = transpose(label); + label = Reshape(label, Shape(), false, Shape(0), false); // -1: infer from graph + auto sm = SoftmaxOutput("softmax", pred, label); + if (isTrain) + return sm; + + std::vector outputs = { sm }; + for (auto& state : last_states) { + outputs.push_back(state.C); + outputs.push_back(state.h); + } + return Symbol::Group(outputs); +} + +// Currently mxnet GPU version RNN operator is implemented via *fast* NVIDIA cuDNN. +Symbol LSTMWithBuiltInRNNOp(int num_lstm_layer, int sequence_length, int input_dim, + int num_hidden, int num_embed, mx_float dropout = 0) { + auto isTrain = sequence_length > 1; + auto data = Symbol::Variable("data"); + if (TIME_MAJOR && isTrain) + data = transpose(data); + + auto embed_weight = Symbol::Variable("embed_weight"); + auto embed = Embedding("embed", data, embed_weight, input_dim, num_embed); + auto label = Symbol::Variable("softmax_label"); + label = transpose(label); + label = Reshape(label, Shape(), false, + Shape(0), false); // FullyConnected requires one dimension + if (!TIME_MAJOR && isTrain) + embed = SwapAxis(embed, 0, 1); // Change to time-major as cuDNN requires + + // We need not do the SwapAxis op as python version does. Direct and better performance in C++! + auto rnn_h_init = Symbol::Variable("LSTM_init_h"); + auto rnn_c_init = Symbol::Variable("LSTM_init_c"); + auto rnn_params = Symbol::Variable("LSTM_parameters"); // See explanations near RNNXavier class + auto variable_sequence_length = Symbol::Variable("sequence_length"); + auto rnn = RNN(embed, rnn_params, rnn_h_init, rnn_c_init, variable_sequence_length, num_hidden, + num_lstm_layer, RNNMode::kLstm, false, dropout, !isTrain); + auto hidden = Reshape(rnn[0], Shape(), false, Shape(0, num_hidden), false); + + auto cls_weight = Symbol::Variable("cls_weight"); + auto cls_bias = Symbol::Variable("cls_bias"); + auto pred = FullyConnected("pred", hidden, cls_weight, cls_bias, input_dim); + /*In rnn-time-major/rnn_cell_demo.py, the author claimed time-major version speeds up + * 1.5~2 times versus batch version. I doubts on the conclusion. In my test, the performance + * of both codes are almost same. In fact, there are no substantially differences between + * two codes. They are both based on time major cuDNN, the computation graph only differs + * slightly on the choices of where to put Reshape/SwapAxis/transpose operation. Here I don't + * use Reshape on pred and keep label shape on SoftmaxOutput like time major version code, + * but Reshape on label for simplification. It doesn't make influence on performacne. */ + + auto sm = SoftmaxOutput("softmax", pred, label); + if (isTrain) + return sm; + else + return Symbol::Group({ sm, rnn[1/*RNNOpOutputs::kStateOut=1*/], + rnn[2/*RNNOpOutputs::kStateCellOut=2*/] }); +} + +class Shuffler { + std::vector sequence; + public: + explicit Shuffler(int size) : sequence(size) { + int* p = sequence.data(); + for (int i = 0; i < size; i++) + *p++ = i; + } + void shuffle(std::function lambda = nullptr) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(sequence.begin(), sequence.end(), g); + int n = 0; + if (lambda != nullptr) + for (int i : sequence) + lambda(n++, i); + } + const int* data() { + return sequence.data(); + } +}; + +class BucketSentenceIter : public DataIter { + Shuffler* random; + int batch, current, end; + unsigned int sequence_length; + Context device; + std::vector> sequences; + std::vector index2chars; + std::unordered_map charIndices; + + public: + BucketSentenceIter(std::string filename, int minibatch, Context context) : batch(minibatch), + current(-1), device(context) { + auto content = readContent(filename); + buildCharIndex(content); + sequences = convertTextToSequences(content, '\n'); + + int N = sequences.size() / batch * batch; // total used samples + sequences.resize(N); + sort(sequences.begin(), sequences.end(), [](const std::vector& a, + const std::vector& b) { return a.size() < b.size(); }); + + sequence_length = sequences.back().size(); + random = new Shuffler(N); + // We still can get random results if call Reset() firstly +// std::vector>* target = &sequences; +// random->shuffle([target](int n, int i) { (*target)[n].swap((*target)[i]); }); + end = N / batch; + } + virtual ~BucketSentenceIter() { + delete random; + } + + unsigned int maxSequenceLength() { + return sequence_length; + } + + size_t characterSize() { + return charIndices.size(); + } + + virtual bool Next(void) { + return ++current < end; + } + virtual NDArray GetData(void) { + const int* indices = random->data(); + mx_float *data = new mx_float[sequence_length * batch], *pdata = data; + + for (int i = current * batch, end = i + batch; i < end; i++) { + memcpy(pdata, sequences[indices[i]].data(), sequences[indices[i]].size() * sizeof(mx_float)); + if (sequences[indices[i]].size() < sequence_length) + memset(pdata + sequences[indices[i]].size(), 0, + (sequence_length - sequences[indices[i]].size()) * sizeof(mx_float)); + pdata += sequence_length; + } + NDArray array(Shape(batch, sequence_length), device, false); + array.SyncCopyFromCPU(data, batch * sequence_length); + return array; + } + virtual NDArray GetLabel(void) { + const int* indices = random->data(); + mx_float *label = new mx_float[sequence_length * batch], *plabel = label; + + for (int i = current * batch, end = i + batch; i < end; i++) { + memcpy(plabel, sequences[indices[i]].data() + 1, + (sequences[indices[i]].size() - 1) * sizeof(mx_float)); + memset(plabel + sequences[indices[i]].size() - 1, 0, + (sequence_length - sequences[indices[i]].size() + 1) * sizeof(mx_float)); + plabel += sequence_length; + } + NDArray array(Shape(batch, sequence_length), device, false); + array.SyncCopyFromCPU(label, batch * sequence_length); + return array; + } + virtual int GetPadNum(void) { + return sequence_length - sequences[random->data()[current * batch]].size(); + } + virtual std::vector GetIndex(void) { + const int* indices = random->data(); + std::vector list(indices + current * batch, indices + current * batch + batch); + return list; + } + virtual void BeforeFirst(void) { + current = -1; + random->shuffle(nullptr); + } + + std::wstring readContent(const std::string file) { + std::wifstream ifs(file, std::ios::binary); + if (ifs) { + std::wostringstream os; + os << ifs.rdbuf(); + return os.str(); + } + return L""; + } + + void buildCharIndex(const std::wstring& content) { + // This version buildCharIndex() Compatiable with python version char_rnn dictionary + int n = 1; + charIndices['\0'] = 0; // padding character + index2chars.push_back(0); // padding character index + for (auto c : content) + if (charIndices.find(c) == charIndices.end()) { + charIndices[c] = n++; + index2chars.push_back(c); + } + } +// void buildCharIndex(wstring& content) { +// for (auto c : content) +// charIndices[c]++; // char-frequency map; then char-index map +// std::vector> characters; +// for (auto& iter : charIndices) +// characters.push_back(make_tuple(iter.first, iter.second)); +// sort(characters.begin(), characters.end(), [](const tuple& a, +// const tuple& b) { return get<1>(a) > get<1>(b); }); +// mx_float index = 1; //0 is left for zero-padding +// index2chars.clear(); +// index2chars.push_back(0); //zero-padding +// for (auto& t : characters) { +// charIndices[get<0>(t)] = index++; +// index2chars.push_back(get<0>(t)); +// }s +// } + + inline wchar_t character(int i) { + return index2chars[i]; + } + + inline mx_float index(wchar_t c) { + return charIndices[c]; + } + + void saveCharIndices(const std::string file) { + std::wofstream ofs(file, std::ios::binary); + if (ofs) { + ofs.write(index2chars.data() + 1, index2chars.size() - 1); + ofs.close(); + } + } + + static std::tuple, std::vector> loadCharIndices( + const std::string file) { + std::wifstream ifs(file, std::ios::binary); + std::unordered_map map; + std::vector chars; + if (ifs) { + std::wostringstream os; + os << ifs.rdbuf(); + int n = 1; + map[L'\0'] = 0; + chars.push_back(L'\0'); + for (auto c : os.str()) { + map[c] = (mx_float) n++; + chars.push_back(c); + } + } + // Note: Can't use {} because this would hit the explicit constructor + return std::tuple, std::vector>(map, chars); + } + + std::vector> + convertTextToSequences(const std::wstring& content, wchar_t spliter) { + std::vector> sequences; + sequences.push_back(std::vector()); + for (auto c : content) + if (c == spliter && !sequences.back().empty()) + sequences.push_back(std::vector()); + else + sequences.back().push_back(charIndices[c]); + return sequences; + } +}; + +void OutputPerplexity(NDArray* labels, NDArray* output) { + std::vector charIndices, a; + labels->SyncCopyToCPU(&charIndices, 0L); // 0L indicates all + output->SyncCopyToCPU(&a, 0L)/*4128*84*/; + mx_float loss = 0; + int batchSize = labels->GetShape()[0]/*32*/, sequenceLength = labels->GetShape()[1]/*129*/, + nSamples = output->GetShape()[0]/*4128*/, vocabSize = output->GetShape()[1]/*84*/; + for (int n = 0; n < nSamples; n++) { + int row = n % batchSize, column = n / batchSize, labelOffset = column + + row * sequenceLength; // Search based on column storage: labels.T + mx_float safe_value = std::max(1e-10f, a[vocabSize * n + + static_cast(charIndices[labelOffset])]); + loss += -log(safe_value); // Calculate negative log-likelihood + } + loss = exp(loss / nSamples); + std::cout << "Train-Perplexity=" << loss << std::endl; +} + +void SaveCheckpoint(const std::string filepath, Symbol net, Executor* exe) { + std::map params; + for (auto iter : exe->arg_dict()) + if (iter.first.find("_init_") == std::string::npos + && iter.first.rfind("data") != iter.first.length() - 4 + && iter.first.rfind("label") != iter.first.length() - 5) + params.insert({"arg:" + iter.first, iter.second}); + for (auto iter : exe->aux_dict()) + params.insert({"aux:" + iter.first, iter.second}); + NDArray::Save(filepath, params); +} + +void LoadCheckpoint(const std::string filepath, Executor* exe) { + std::map params = NDArray::LoadToMap(filepath); + for (auto iter : params) { + std::string type = iter.first.substr(0, 4); + std::string name = iter.first.substr(4); + NDArray target; + if (type == "arg:") + target = exe->arg_dict()[name]; + else if (type == "aux:") + target = exe->aux_dict()[name]; + else + continue; + iter.second.CopyTo(&target); + } +} + +int input_dim = 0;/*84*/ +int sequence_length_max = 0;/*129*/ +int num_embed = 256; +int num_lstm_layer = 3; +int num_hidden = 512; +mx_float dropout = 0.2; +void train(const std::string file, int batch_size, int max_epoch, int start_epoch) { + Context device(DeviceType::kGPU, 0); + BucketSentenceIter dataIter(file, batch_size, device); + std::string prefix = file.substr(0, file.rfind(".")); + dataIter.saveCharIndices(prefix + ".dictionary"); + + input_dim = static_cast(dataIter.characterSize()); + sequence_length_max = dataIter.maxSequenceLength(); + + auto RNN = LSTMUnroll(num_lstm_layer, sequence_length_max, input_dim, num_hidden, + num_embed, dropout); + std::map args_map; + args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); + args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); + for (int i = 0; i < num_lstm_layer; i++) { + std::string key = "l" + std::to_string(i) + "_init_"; + args_map[key + "c"] = NDArray(Shape(batch_size, num_hidden), device, false); + args_map[key + "h"] = NDArray(Shape(batch_size, num_hidden), device, false); + } + std::vector zeros(batch_size * num_hidden, 0); + // RNN.SimpleBind(device, args_map, {}, {{"data", kNullOp}}); + Executor* exe = RNN.SimpleBind(device, args_map); + + if (start_epoch == -1) { + auto initializer = Uniform(0.07); + for (auto &arg : exe->arg_dict()) { + initializer(arg.first, &arg.second); + } + } else { + LoadCheckpoint(prefix + "-" + std::to_string(start_epoch) + ".params", exe); + } + start_epoch++; + + mx_float learning_rate = 0.0002; + mx_float weight_decay = 0.000002; + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); +// opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) +// ->SetParam("clip_gradient", 10); + + for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { + dataIter.Reset(); + auto tic = std::chrono::system_clock::now(); + while (dataIter.Next()) { + auto data_batch = dataIter.GetDataBatch(); + data_batch.data.CopyTo(&exe->arg_dict()["data"]); + data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); + for (int l = 0; l < num_lstm_layer; l++) { + std::string key = "l" + std::to_string(l) + "_init_"; + exe->arg_dict()[key + "c"].SyncCopyFromCPU(zeros); + exe->arg_dict()[key + "h"].SyncCopyFromCPU(zeros); + } + NDArray::WaitAll(); + + exe->Forward(true); + exe->Backward(); + for (size_t i = 0; i < exe->arg_arrays.size(); ++i) { + opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); + } + + NDArray::WaitAll(); + } + auto toc = std::chrono::system_clock::now(); + std::cout << "Epoch[" << epoch << "] Time Cost:" << + std::chrono::duration_cast< std::chrono::seconds>(toc - tic).count() << " seconds "; + OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); + std::string filepath = prefix + "-" + std::to_string(epoch) + ".params"; + SaveCheckpoint(filepath, RNN, exe); + } + + delete exe; + delete opt; +} + +/*The original example, rnn_cell_demo.py, uses default Xavier as initalizer, which relies on + * variable name, cannot initialize LSTM_parameters. Thus it was renamed to LSTM_bias, + * which can be initialized as zero. But it cannot converge after 100 epochs in this corpus + * example. Using RNNXavier, after 15 oscillating epochs, it rapidly converges like old + * LSTMUnroll version. */ +class RNNXavier : public Xavier { + public: + RNNXavier(RandType rand_type = gaussian, FactorType factor_type = avg, + float magnitude = 3) : Xavier(rand_type, factor_type, magnitude) { + } + virtual ~RNNXavier() {} + protected: + virtual void InitDefault(NDArray* arr) { + Xavier::InitWeight(arr); + } +}; + +void trainWithBuiltInRNNOp(const std::string file, int batch_size, int max_epoch, int start_epoch) { + Context device(DeviceType::kGPU, 0); + BucketSentenceIter dataIter(file, batch_size, device); + std::string prefix = file.substr(0, file.rfind(".")); + dataIter.saveCharIndices(prefix + ".dictionary"); + + input_dim = static_cast(dataIter.characterSize()); + sequence_length_max = dataIter.maxSequenceLength(); + + auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden, + num_embed, dropout); + std::map args_map; + args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false); + // Avoiding SwapAxis, batch_size is of second dimension. + args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); + args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false); + args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false); + std::vector zeros(batch_size * num_lstm_layer * num_hidden, 0); + Executor* exe = RNN.SimpleBind(device, args_map); + + if (start_epoch == -1) { + RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34); + for (auto &arg : exe->arg_dict()) + xavier(arg.first, &arg.second); + } else { + LoadCheckpoint(prefix + "-" + std::to_string(start_epoch) + ".params", exe); + } + start_epoch++; + + Optimizer* opt = OptimizerRegistry::Find("ccsgd"); +// opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size) +// ->SetParam("clip_gradient", 10); + + for (int epoch = start_epoch; epoch < max_epoch; ++epoch) { + dataIter.Reset(); + auto tic = std::chrono::system_clock::now(); + while (dataIter.Next()) { + auto data_batch = dataIter.GetDataBatch(); + data_batch.data.CopyTo(&exe->arg_dict()["data"]); + data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]); + exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros); + exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros); + NDArray::WaitAll(); + + exe->Forward(true); + exe->Backward(); + for (size_t i = 0; i < exe->arg_arrays.size(); ++i) { + opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); + } + NDArray::WaitAll(); + } + auto toc = std::chrono::system_clock::now(); + std::cout << "Epoch[" << epoch << "] Time Cost:" << + std::chrono::duration_cast< std::chrono::seconds>(toc - tic).count() << " seconds "; + OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]); + std::string filepath = prefix + "-" + std::to_string(epoch) + ".params"; + SaveCheckpoint(filepath, RNN, exe); + } + + delete exe; + delete opt; +} + +void predict(std::wstring* ptext, int sequence_length, const std::string param_file, + const std::string dictionary_file) { + Context device(DeviceType::kGPU, 0); + auto results = BucketSentenceIter::loadCharIndices(dictionary_file); + auto dictionary = std::get<0>(results); + auto charIndices = std::get<1>(results); + input_dim = static_cast(charIndices.size()); + auto RNN = LSTMUnroll(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); + + std::map args_map; + args_map["data"] = NDArray(Shape(1, 1), device, false); + args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); + std::vector zeros(1 * num_hidden, 0); + for (int l = 0; l < num_lstm_layer; l++) { + std::string key = "l" + std::to_string(l) + "_init_"; + args_map[key + "c"] = NDArray(Shape(1, num_hidden), device, false); + args_map[key + "h"] = NDArray(Shape(1, num_hidden), device, false); + args_map[key + "c"].SyncCopyFromCPU(zeros); + args_map[key + "h"].SyncCopyFromCPU(zeros); + } + Executor* exe = RNN.SimpleBind(device, args_map); + LoadCheckpoint(param_file, exe); + + mx_float index; + wchar_t next = 0; + std::vector softmax; + softmax.resize(input_dim); + for (auto c : *ptext) { + exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); + exe->Forward(false); + + exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); + for (int l = 0; l < num_lstm_layer; l++) { + std::string key = "l" + std::to_string(l) + "_init_"; + exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); + exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); + } + + size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); + index = (mx_float) n; + next = charIndices[n]; + } + ptext->push_back(next); + + for (int i = 0; i < sequence_length; i++) { + exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); + exe->Forward(false); + + exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); + for (int l = 0; l < num_lstm_layer; l++) { + std::string key = "l" + std::to_string(l) + "_init_"; + exe->outputs[l * 2 + 1].CopyTo(&args_map[key + "c"]); + exe->outputs[l * 2 + 2].CopyTo(&args_map[key + "h"]); + } + + size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); + index = (mx_float) n; + next = charIndices[n]; + ptext->push_back(next); + } + + delete exe; +} + +void predictWithBuiltInRNNOp(std::wstring* ptext, int sequence_length, const std::string param_file, + const std::string dictionary_file) { + Context device(DeviceType::kGPU, 0); + auto results = BucketSentenceIter::loadCharIndices(dictionary_file); + auto dictionary = std::get<0>(results); + auto charIndices = std::get<1>(results); + input_dim = static_cast(charIndices.size()); + auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, 1, input_dim, num_hidden, num_embed, 0); + + std::map args_map; + args_map["data"] = NDArray(Shape(1, 1), device, false); + args_map["softmax_label"] = NDArray(Shape(1, 1), device, false); + std::vector zeros(1 * num_lstm_layer * num_hidden, 0); + // Avoiding SwapAxis, batch_size=1 is of second dimension. + args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); + args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, 1, num_hidden), device, false); + args_map["LSTM_init_c"].SyncCopyFromCPU(zeros); + args_map["LSTM_init_h"].SyncCopyFromCPU(zeros); + Executor* exe = RNN.SimpleBind(device, args_map); + LoadCheckpoint(param_file, exe); + + mx_float index; + wchar_t next = 0; + std::vector softmax; + softmax.resize(input_dim); + for (auto c : *ptext) { + exe->arg_dict()["data"].SyncCopyFromCPU(&dictionary[c], 1); + exe->Forward(false); + + exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); + exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); + exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); + + size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); + index = (mx_float) n; + next = charIndices[n]; + } + ptext->push_back(next); + + for (int i = 0; i < sequence_length; i++) { + exe->arg_dict()["data"].SyncCopyFromCPU(&index, 1); + exe->Forward(false); + + exe->outputs[0].SyncCopyToCPU(softmax.data(), input_dim); + exe->outputs[1].CopyTo(&args_map["LSTM_init_h"]); + exe->outputs[2].CopyTo(&args_map["LSTM_init_c"]); + + size_t n = max_element(softmax.begin(), softmax.end()) - softmax.begin(); + index = (mx_float) n; + next = charIndices[n]; + ptext->push_back(next); + } + + delete exe; +} + +int main(int argc, char** argv) { + if (argc < 5) { + std::cout << "Usage for training: charRNN train[BuiltIn][TimeMajor] {corpus file}" + " {batch size} {max epoch} [{starting epoch}]" << std::endl; + std::cout <<"Usage for prediction: charRNN predict[BuiltIn][TimeMajor] {params file}" + " {dictionary file} {beginning of text}" << std::endl; + std::cout <<"Note: The {params file} of train/trainBuiltIn/trainTimeMajor/trainBuiltInTimeMajor" + " are not compatible with each other." << std::endl; + return 0; + } + + std::string task = argv[1]; + bool builtIn = task.find("BuiltIn") != std::string::npos; + TIME_MAJOR = task.find("TimeMajor") != std::string::npos; + std::cout << "use BuiltIn cuDNN RNN: " << builtIn << std::endl + << "use data as TimeMajor: " << TIME_MAJOR << std::endl; + TRY + if (task.find("train") == 0) { + std::cout << "train batch size: " << argv[3] << std::endl + << "train max epoch: " << argv[4] << std::endl; + int start_epoch = argc > 5? atoi(argv[5]) : -1; + // this function will generate dictionary file and params file. + if (builtIn) + trainWithBuiltInRNNOp(argv[2], atoi(argv[3]), atoi(argv[4]), start_epoch); + else + train(argv[2], atoi(argv[3]), atoi(argv[4]), start_epoch); // ditto + } else if (task.find("predict") == 0) { + std::wstring text; // = L"If there is anyone out there who still doubts "; + // Considering of extending to Chinese samples in future, use wchar_t instead of char + for (char c : std::string(argv[4])) + text.push_back((wchar_t) c); + /*Python version predicts text default to random selecltions. Here I didn't write the random + code, always choose the 'best' character. So the text length reduced to 600. Longer size often + leads to repeated sentances, since training sequence length is only 129 for obama corpus.*/ + if (builtIn) + predictWithBuiltInRNNOp(&text, 600, argv[2], argv[3]); + else + predict(&text, 600, argv[2], argv[3]); + std::wcout << text << std::endl; + } + + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/feature_extract/README.md b/cpp-package/example/feature_extract/README.md new file mode 100644 index 000000000000..0b94bef7705f --- /dev/null +++ b/cpp-package/example/feature_extract/README.md @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + +This example shows how to extract features with a pretrained model. + +Execute `run.sh` to: +- Download a pretrained model +- Download sample pictures (`dog.jpg` and `cat.jpg`) +- Compile the files +- Execute the featurization on `dog.jpg` and `cat.jpg` + + +Note: +1. The filename of network parameters may vary, line 67 in `feature_extract.cpp` should be updated accordingly. +2. You need to build MXNet from source to get access to the `lib/libmxnet.so` or point `LD_LIBRARY_PATH` to where it is installed in your system diff --git a/cpp-package/example/feature_extract/feature_extract.cpp b/cpp-package/example/feature_extract/feature_extract.cpp new file mode 100644 index 000000000000..d614fd576238 --- /dev/null +++ b/cpp-package/example/feature_extract/feature_extract.cpp @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" +using namespace std; +using namespace mxnet::cpp; + +/* + * This example shows how to extract features with a pretrained model. + * Get the model here: + * https://github.com/dmlc/mxnet-model-gallery + * */ + +/*The global context, change them if necessary*/ +Context global_ctx(kGPU, 0); +// Context global_ctx(kCPU,0); + +class FeatureExtractor { + private: + /*the mean image, get from the pretrained model*/ + NDArray mean_img; + /*the following two maps store all the paramters need by the model*/ + map args_map; + map aux_map; + Symbol net; + Executor *executor; + /*Get the feature layer we want to extract*/ + void GetFeatureSymbol() { + /* + * use the following to check all the layers' names: + * */ + /* + net=Symbol::Load("./model/Inception_BN-symbol.json").GetInternals(); + for(const auto & layer_name:net.ListOutputs()){ + LG< paramters; + NDArray::Load("./model/Inception-BN-0126.params", 0, ¶mters); + for (const auto &k : paramters) { + if (k.first.substr(0, 4) == "aux:") { + auto name = k.first.substr(4, k.first.size() - 4); + aux_map[name] = k.second.Copy(global_ctx); + } + if (k.first.substr(0, 4) == "arg:") { + auto name = k.first.substr(4, k.first.size() - 4); + args_map[name] = k.second.Copy(global_ctx); + } + } + /*WaitAll is need when we copy data between GPU and the main memory*/ + NDArray::WaitAll(); + } + void GetMeanImg() { + mean_img = NDArray(Shape(1, 3, 224, 224), global_ctx, false); + mean_img.SyncCopyFromCPU( + NDArray::LoadToMap("./model/mean_224.nd")["mean_img"].GetData(), + 1 * 3 * 224 * 224); + NDArray::WaitAll(); + } + + public: + FeatureExtractor() { + /*prepare the model, fill the pretrained parameters, get the mean image*/ + GetFeatureSymbol(); + LoadParameters(); + GetMeanImg(); + } + + void Extract(NDArray data) { + /*Normalize the pictures*/ + data.Slice(0, 1) -= mean_img; + data.Slice(1, 2) -= mean_img; + args_map["data"] = data; + /*bind the executor*/ + executor = net.SimpleBind(global_ctx, args_map, map(), + map(), aux_map); + executor->Forward(false); + /*print out the features*/ + auto array = executor->outputs[0].Copy(Context(kCPU, 0)); + NDArray::WaitAll(); + array = array.Reshape({2, 1024}); + for (int i = 0; i < 1024; ++i) { + cout << array.At(0, i) << ","; + } + cout << endl; + } +}; + +NDArray Data2NDArray() { + NDArray ret(Shape(2, 3, 224, 224), global_ctx, false); + ifstream inf("./img.dat", ios::binary); + vector data(2 * 3 * 224 * 224); + inf.read(reinterpret_cast(data.data()), 2 * 3 * 224 * 224 * sizeof(float)); + inf.close(); + ret.SyncCopyFromCPU(data.data(), 2 * 3 * 224 * 224); + NDArray::WaitAll(); + return ret; +} + +int main() { + /* + * get the data from a binary file ./img.data + * this file is generated by ./prepare_data_with_opencv + * it stores 2 pictures in NDArray format + * + */ + auto data = Data2NDArray(); + FeatureExtractor fe; + fe.Extract(data); + return 0; +} diff --git a/cpp-package/example/feature_extract/prepare_data_with_opencv.cpp b/cpp-package/example/feature_extract/prepare_data_with_opencv.cpp new file mode 100644 index 000000000000..fe32e896adb1 --- /dev/null +++ b/cpp-package/example/feature_extract/prepare_data_with_opencv.cpp @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include + +using namespace std; + +/*read images and store them the NDArray format that MXNet.cpp can handle*/ +void Mat2Array() { + string file_name_list[] = {"./dog.jpg", "./cat.jpg"}; + + std::vector array; + for (auto &t : file_name_list) { + cv::Mat mat = cv::imread(t); + /*resize pictures to (224, 224) according to the pretrained model*/ + cv::resize(mat, mat, cv::Size(224, 224)); + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < 224; ++i) { + for (int j = 0; j < 224; ++j) { + array.push_back(static_cast(mat.data[(i * 224 + j) * 3 + c])); + } + } + } + } + ofstream outf("./img.dat", ios::binary); + outf.write(reinterpret_cast(array.data()), array.size() * sizeof(float)); + outf.close(); +} + +int main(int argc, char *argv[]) { + Mat2Array(); + return 0; +} diff --git a/cpp-package/example/feature_extract/run.sh b/cpp-package/example/feature_extract/run.sh new file mode 100755 index 000000000000..b98ddb9eb81e --- /dev/null +++ b/cpp-package/example/feature_extract/run.sh @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Downloading the data and model +mkdir -p model +wget -nc -O model/Inception-BN-symbol.json \ + http://data.mxnet.io/mxnet/models/imagenet/inception-bn/Inception-BN-symbol.json +wget -nc -O model/synset.txt \ + http://data.mxnet.io/mxnet/models/imagenet/synset.txt +wget -nc -O model/Inception-BN-0126.params \ + http://data.mxnet.io/mxnet/models/imagenet/inception-bn/Inception-BN-0126.params?raw=true +wget -nc -O cat.jpg https://github.com/dmlc/web-data/blob/master/mxnet/doc/tutorials/python/predict_image/cat.jpg?raw=true +wget -nc -O dog.jpg https://github.com/dmlc/web-data/blob/master/mxnet/doc/tutorials/python/predict_image/dog.jpg?raw=true +wget -nc -O model/mean_224.nd https://github.com/dmlc/web-data/raw/master/mxnet/example/feature_extract/mean_224.nd +tar -xvzf inception-bn.tar.gz -C model --skip-old-files + +# Building +make + +# Preparing the data +./prepare_data_with_opencv + +# Running the featurization +LD_LIBRARY_PATH=../../../lib ./feature_extract diff --git a/cpp-package/example/get_data.sh b/cpp-package/example/get_data.sh new file mode 100755 index 000000000000..fda69ce2f087 --- /dev/null +++ b/cpp-package/example/get_data.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +mkdir -p data/mnist_data +cd data/mnist_data + +download () { + local URL=$1 + local GZ_FILE_NAME="${URL##*/}" + + local FILE_NAME="${GZ_FILE_NAME%.*}" + if [[ -f "${FILE_NAME}" ]]; then + echo "File ${FILE_NAME} already downloaded." + return 0 + fi + + echo "Downloading ${URL} ..." + local CURL_OPTIONS="--connect-timeout 10 \ + --max-time 300 \ + --retry-delay 10 \ + --retry 3 \ + --retry-delay 0 \ + --location \ + --silent" + curl ${CURL_OPTIONS} ${URL} -o ${GZ_FILE_NAME} + + if [[ ! -f "${GZ_FILE_NAME}" ]]; then + echo "File ${URL} couldn't be downloaded!" + exit 1 + fi + + gzip -d ${GZ_FILE_NAME} + (($? != 0)) && exit 1 || return 0 +} + +# MNIST dataset from: http://yann.lecun.com/exdb/mnist/ +FILES=( + "https://web.archive.org/web/20160828233817/http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" + "https://web.archive.org/web/20160828233817/http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" + "https://web.archive.org/web/20160828233817/http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz" + "https://web.archive.org/web/20160828233817/http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz" + "http://data.mxnet.io/data/mnist_train.csv.gz") + +for FILE in ${FILES[@]}; do + download ${FILE} +done diff --git a/cpp-package/example/googlenet.cpp b/cpp-package/example/googlenet.cpp new file mode 100644 index 000000000000..c14ef5fd1dc1 --- /dev/null +++ b/cpp-package/example/googlenet.cpp @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +Symbol ConvFactory(Symbol data, int num_filter, + Shape kernel, + Shape stride = Shape(1, 1), + Shape pad = Shape(0, 0), + const std::string & name = "", + const std::string & suffix = "") { + Symbol conv_w("conv_" + name + suffix + "_w"), conv_b("conv_" + name + suffix + "_b"); + + Symbol conv = Convolution("conv_" + name + suffix, data, + conv_w, conv_b, kernel, + num_filter, stride, Shape(1, 1), pad); + return Activation("relu_" + name + suffix, conv, "relu"); +} + +Symbol InceptionFactory(Symbol data, int num_1x1, int num_3x3red, + int num_3x3, int num_d5x5red, int num_d5x5, + PoolingPoolType pool, int proj, const std::string & name) { + Symbol c1x1 = ConvFactory(data, num_1x1, Shape(1, 1), + Shape(1, 1), Shape(0, 0), name + "_1x1"); + + Symbol c3x3r = ConvFactory(data, num_3x3red, Shape(1, 1), + Shape(1, 1), Shape(0, 0), name + "_3x3", "_reduce"); + + Symbol c3x3 = ConvFactory(c3x3r, num_3x3, Shape(3, 3), + Shape(1, 1), Shape(1, 1), name + "_3x3"); + + Symbol cd5x5r = ConvFactory(data, num_d5x5red, Shape(1, 1), + Shape(1, 1), Shape(0, 0), name + "_5x5", "_reduce"); + + Symbol cd5x5 = ConvFactory(cd5x5r, num_d5x5, Shape(5, 5), + Shape(1, 1), Shape(2, 2), name + "_5x5"); + + Symbol pooling = Pooling(name + "_pool", data, Shape(3, 3), pool, + false, false, PoolingPoolingConvention::kValid, + Shape(1, 1), Shape(1, 1)); + + Symbol cproj = ConvFactory(pooling, proj, Shape(1, 1), + Shape(1, 1), Shape(0, 0), name + "_proj"); + + std::vector lst; + lst.push_back(c1x1); + lst.push_back(c3x3); + lst.push_back(cd5x5); + lst.push_back(cproj); + return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); +} + +Symbol GoogleNetSymbol(int num_classes) { + // data and label + Symbol data = Symbol::Variable("data"); + Symbol data_label = Symbol::Variable("data_label"); + + Symbol conv1 = ConvFactory(data, 64, Shape(7, 7), Shape(2, 2), Shape(3, 3), "conv1"); + Symbol pool1 = Pooling("pool1", conv1, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + Symbol conv2 = ConvFactory(pool1, 64, Shape(1, 1), Shape(1, 1), + Shape(0, 0), "conv2"); + Symbol conv3 = ConvFactory(conv2, 192, Shape(3, 3), Shape(1, 1), Shape(1, 1), "conv3"); + Symbol pool3 = Pooling("pool3", conv3, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + Symbol in3a = InceptionFactory(pool3, 64, 96, 128, 16, 32, PoolingPoolType::kMax, 32, "in3a"); + Symbol in3b = InceptionFactory(in3a, 128, 128, 192, 32, 96, PoolingPoolType::kMax, 64, "in3b"); + Symbol pool4 = Pooling("pool4", in3b, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + Symbol in4a = InceptionFactory(pool4, 192, 96, 208, 16, 48, PoolingPoolType::kMax, 64, "in4a"); + Symbol in4b = InceptionFactory(in4a, 160, 112, 224, 24, 64, PoolingPoolType::kMax, 64, "in4b"); + Symbol in4c = InceptionFactory(in4b, 128, 128, 256, 24, 64, PoolingPoolType::kMax, 64, "in4c"); + Symbol in4d = InceptionFactory(in4c, 112, 144, 288, 32, 64, PoolingPoolType::kMax, 64, "in4d"); + Symbol in4e = InceptionFactory(in4d, 256, 160, 320, 32, 128, PoolingPoolType::kMax, 128, "in4e"); + Symbol pool5 = Pooling("pool5", in4e, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + Symbol in5a = InceptionFactory(pool5, 256, 160, 320, 32, 128, PoolingPoolType::kMax, 128, "in5a"); + Symbol in5b = InceptionFactory(in5a, 384, 192, 384, 48, 128, PoolingPoolType::kMax, 128, "in5b"); + Symbol pool6 = Pooling("pool6", in5b, Shape(7, 7), PoolingPoolType::kAvg, + false, false, PoolingPoolingConvention::kValid, Shape(1, 1)); + + Symbol flatten = Flatten("flatten", pool6); + + Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); + Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, num_classes); + + return SoftmaxOutput("softmax", fc1, data_label); +} + +int main(int argc, char const *argv[]) { + int batch_size = 50; + int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; + float learning_rate = 1e-4; + float weight_decay = 1e-4; + + auto ctx = Context::gpu(); +#if !MXNET_USE_CUDA + ctx = Context::cpu();; +#endif + + TRY + auto googlenet = GoogleNetSymbol(10); + std::map args_map; + std::map aux_map; + + args_map["data"] = NDArray(Shape(batch_size, 3, 256, 256), ctx); + args_map["data_label"] = NDArray(Shape(batch_size), ctx); + googlenet.InferArgsMap(ctx, &args_map, args_map); + + std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", + "./data/mnist_data/train-labels-idx1-ubyte", + "./data/mnist_data/t10k-images-idx3-ubyte", + "./data/mnist_data/t10k-labels-idx1-ubyte" + }; + + auto train_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { + return 1; + } + + auto val_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { + return 1; + } + + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("momentum", 0.9) + ->SetParam("rescale_grad", 1.0 / batch_size) + ->SetParam("clip_gradient", 10) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + + auto *exec = googlenet.SimpleBind(ctx, args_map); + auto arg_names = googlenet.ListArguments(); + + for (int iter = 0; iter < max_epoch; ++iter) { + LG << "Epoch: " << iter; + train_iter.Reset(); + while (train_iter.Next()) { + auto data_batch = train_iter.GetDataBatch(); + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + exec->Forward(true); + exec->Backward(); + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; + opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); + } + } + + Accuracy acu; + val_iter.Reset(); + while (val_iter.Next()) { + auto data_batch = val_iter.GetDataBatch(); + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + exec->Forward(false); + NDArray::WaitAll(); + acu.Update(data_batch.label, exec->outputs[0]); + } + LG << "Accuracy: " << acu.Get(); + } + + delete exec; + delete opt; + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/inception_bn.cpp b/cpp-package/example/inception_bn.cpp new file mode 100644 index 000000000000..b7cc64a3317a --- /dev/null +++ b/cpp-package/example/inception_bn.cpp @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +Symbol ConvFactoryBN(Symbol data, int num_filter, + Shape kernel, Shape stride, Shape pad, + const std::string & name, + const std::string & suffix = "") { + Symbol conv_w("conv_" + name + suffix + "_w"), conv_b("conv_" + name + suffix + "_b"); + + Symbol conv = Convolution("conv_" + name + suffix, data, + conv_w, conv_b, kernel, + num_filter, stride, Shape(1, 1), pad); + std::string name_suffix = name + suffix; + Symbol gamma(name_suffix + "_gamma"); + Symbol beta(name_suffix + "_beta"); + Symbol mmean(name_suffix + "_mmean"); + Symbol mvar(name_suffix + "_mvar"); + Symbol bn = BatchNorm("bn_" + name + suffix, conv, gamma, beta, mmean, mvar); + return Activation("relu_" + name + suffix, bn, "relu"); +} + +Symbol InceptionFactoryA(Symbol data, int num_1x1, int num_3x3red, + int num_3x3, int num_d3x3red, int num_d3x3, + PoolingPoolType pool, int proj, + const std::string & name) { + Symbol c1x1 = ConvFactoryBN(data, num_1x1, Shape(1, 1), Shape(1, 1), + Shape(0, 0), name + "1x1"); + Symbol c3x3r = ConvFactoryBN(data, num_3x3red, Shape(1, 1), Shape(1, 1), + Shape(0, 0), name + "_3x3r"); + Symbol c3x3 = ConvFactoryBN(c3x3r, num_3x3, Shape(3, 3), Shape(1, 1), + Shape(1, 1), name + "_3x3"); + Symbol cd3x3r = ConvFactoryBN(data, num_d3x3red, Shape(1, 1), Shape(1, 1), + Shape(0, 0), name + "_double_3x3", "_reduce"); + Symbol cd3x3 = ConvFactoryBN(cd3x3r, num_d3x3, Shape(3, 3), Shape(1, 1), + Shape(1, 1), name + "_double_3x3_0"); + cd3x3 = ConvFactoryBN(data = cd3x3, num_d3x3, Shape(3, 3), Shape(1, 1), + Shape(1, 1), name + "_double_3x3_1"); + Symbol pooling = Pooling(name + "_pool", data, + Shape(3, 3), pool, false, false, + PoolingPoolingConvention::kValid, + Shape(1, 1), Shape(1, 1)); + Symbol cproj = ConvFactoryBN(pooling, proj, Shape(1, 1), Shape(1, 1), + Shape(0, 0), name + "_proj"); + std::vector lst; + lst.push_back(c1x1); + lst.push_back(c3x3); + lst.push_back(cd3x3); + lst.push_back(cproj); + return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); +} + +Symbol InceptionFactoryB(Symbol data, int num_3x3red, int num_3x3, + int num_d3x3red, int num_d3x3, const std::string & name) { + Symbol c3x3r = ConvFactoryBN(data, num_3x3red, Shape(1, 1), + Shape(1, 1), Shape(0, 0), + name + "_3x3", "_reduce"); + Symbol c3x3 = ConvFactoryBN(c3x3r, num_3x3, Shape(3, 3), Shape(2, 2), + Shape(1, 1), name + "_3x3"); + Symbol cd3x3r = ConvFactoryBN(data, num_d3x3red, Shape(1, 1), Shape(1, 1), + Shape(0, 0), name + "_double_3x3", "_reduce"); + Symbol cd3x3 = ConvFactoryBN(cd3x3r, num_d3x3, Shape(3, 3), Shape(1, 1), + Shape(1, 1), name + "_double_3x3_0"); + cd3x3 = ConvFactoryBN(cd3x3, num_d3x3, Shape(3, 3), Shape(2, 2), + Shape(1, 1), name + "_double_3x3_1"); + Symbol pooling = Pooling("max_pool_" + name + "_pool", data, + Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, + Shape(2, 2), Shape(1, 1)); + std::vector lst; + lst.push_back(c3x3); + lst.push_back(cd3x3); + lst.push_back(pooling); + return Concat("ch_concat_" + name + "_chconcat", lst, lst.size()); +} + +Symbol InceptionSymbol(int num_classes) { + // data and label + Symbol data = Symbol::Variable("data"); + Symbol data_label = Symbol::Variable("data_label"); + + // stage 1 + Symbol conv1 = ConvFactoryBN(data, 64, Shape(7, 7), Shape(2, 2), Shape(3, 3), "conv1"); + Symbol pool1 = Pooling("pool1", conv1, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + // stage 2 + Symbol conv2red = ConvFactoryBN(pool1, 64, Shape(1, 1), Shape(1, 1), Shape(0, 0), "conv2red"); + Symbol conv2 = ConvFactoryBN(conv2red, 192, Shape(3, 3), Shape(1, 1), Shape(1, 1), "conv2"); + Symbol pool2 = Pooling("pool2", conv2, Shape(3, 3), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + // stage 3 + Symbol in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, PoolingPoolType::kAvg, 32, "3a"); + Symbol in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, PoolingPoolType::kAvg, 64, "3b"); + Symbol in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, "3c"); + + // stage 4 + Symbol in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, PoolingPoolType::kAvg, 128, "4a"); + Symbol in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, PoolingPoolType::kAvg, 128, "4b"); + Symbol in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, PoolingPoolType::kAvg, 128, "4c"); + Symbol in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, PoolingPoolType::kAvg, 128, "4d"); + Symbol in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, "4e"); + + // stage 5 + Symbol in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, PoolingPoolType::kAvg, 128, "5a"); + Symbol in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, PoolingPoolType::kMax, 128, "5b"); + + // average pooling + Symbol avg = Pooling("global_pool", in5b, Shape(7, 7), PoolingPoolType::kAvg); + + // classifier + Symbol flatten = Flatten("flatten", avg); + Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); + Symbol fc1 = FullyConnected("fc1", flatten, conv1_w, conv1_b, num_classes); + return SoftmaxOutput("softmax", fc1, data_label); +} + +NDArray ResizeInput(NDArray data, const Shape new_shape) { + NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); + NDArray pic_1channel; + Operator("_contrib_BilinearResize2D") + .SetParam("height", new_shape[2]) + .SetParam("width", new_shape[3]) + (pic).Invoke(pic_1channel); + NDArray output; + Operator("tile") + .SetParam("reps", Shape(1, 3, 1, 1)) + (pic_1channel).Invoke(output); + return output; +} + +int main(int argc, char const *argv[]) { + int batch_size = 40; + int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; + float learning_rate = 1e-2; + float weight_decay = 1e-4; + + /*context*/ + auto ctx = Context::cpu(); + int num_gpu; + MXGetGPUCount(&num_gpu); +#if MXNET_USE_CUDA + if (num_gpu > 0) { + ctx = Context::gpu(); + } +#endif + + TRY + auto inception_bn_net = InceptionSymbol(10); + std::map args_map; + std::map aux_map; + + const Shape data_shape = Shape(batch_size, 3, 224, 224), + label_shape = Shape(batch_size); + args_map["data"] = NDArray(data_shape, ctx); + args_map["data_label"] = NDArray(label_shape, ctx); + inception_bn_net.InferArgsMap(ctx, &args_map, args_map); + + std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", + "./data/mnist_data/train-labels-idx1-ubyte", + "./data/mnist_data/t10k-images-idx3-ubyte", + "./data/mnist_data/t10k-labels-idx1-ubyte" + }; + + auto train_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { + return 1; + } + + auto val_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { + return 1; + } + + // initialize parameters + auto initializer = Uniform(0.07); + for (auto& arg : args_map) { + initializer(arg.first, &arg.second); + } + + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("momentum", 0.9) + ->SetParam("rescale_grad", 1.0 / batch_size) + ->SetParam("clip_gradient", 10) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + auto *exec = inception_bn_net.SimpleBind(ctx, args_map); + auto arg_names = inception_bn_net.ListArguments(); + + // Create metrics + Accuracy train_acc, val_acc; + for (int iter = 0; iter < max_epoch; ++iter) { + LG << "Epoch: " << iter; + train_iter.Reset(); + train_acc.Reset(); + while (train_iter.Next()) { + auto data_batch = train_iter.GetDataBatch(); + ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + + exec->Forward(true); + exec->Backward(); + // Update parameters + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; + opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); + } + + NDArray::WaitAll(); + train_acc.Update(data_batch.label, exec->outputs[0]); + } + + val_iter.Reset(); + val_acc.Reset(); + while (val_iter.Next()) { + auto data_batch = val_iter.GetDataBatch(); + ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + exec->Forward(false); + NDArray::WaitAll(); + val_acc.Update(data_batch.label, exec->outputs[0]); + } + LG << "Train Accuracy: " << train_acc.Get(); + LG << "Validation Accuracy: " << val_acc.Get(); + } + delete exec; + delete opt; + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/inference/CMakeLists.txt b/cpp-package/example/inference/CMakeLists.txt new file mode 100644 index 000000000000..0566d28a57df --- /dev/null +++ b/cpp-package/example/inference/CMakeLists.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Explicitly set GENERATED property https://gitlab.kitware.com/cmake/cmake/issues/18399 +set_property(SOURCE ${CMAKE_CURRENT_LIST_DIR}/../../include/mxnet-cpp/op.h PROPERTY GENERATED 1) + +add_executable(imagenet_inference "imagenet_inference.cpp") +target_link_libraries(imagenet_inference mxnet_cpp) diff --git a/cpp-package/example/inference/README.md b/cpp-package/example/inference/README.md new file mode 100644 index 000000000000..8dc094f71693 --- /dev/null +++ b/cpp-package/example/inference/README.md @@ -0,0 +1,208 @@ + + + + + + + + + + + + + + + + + +# MXNet C++ Package Inference Workflow Examples + +## Building C++ Inference examples + +The examples in this folder demonstrate the **inference** workflow. Please build the MXNet C++ Package as explained in the [README]() File. You can get the executable files by just copying them from ```incubator-mxnet/build/cpp-package/example``` + +## Examples demonstrating inference workflow + +This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS. + +## [imagenet_inference.cpp]() + +This example demonstrates image classification workflow with pre-trained models using MXNet C++ API. Now this script also supports inference with quantized CNN models generated by Intel® MKL-DNN (see this [quantization flow](https://github.com/apache/incubator-mxnet/blob/master/example/quantization/README.md)). By using C++ API, the latency of most models will be reduced to some extent compared with current Python implementation. + +Most of CNN models have been tested on Linux systems. And 50000 images are used to collect accuracy numbers. Please refer to this [README](https://github.com/apache/incubator-mxnet/blob/master/example/quantization/README.md) for more details about accuracy. + +The following performance numbers are collected via using C++ inference API on AWS EC2 C5.12xlarge. The environment variables are set like below: + +``` +export KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0 +export OMP_NUM_THREADS=$(vCPUs/2) +export MXNET_ENGINE_TYPE=NaiveEngine +``` +Also users are recommended to use ```numactl``` or ```taskset``` to bind a running process to the specified cores. + +| Model | Dataset |BS=1 (imgs/sec) |BS=64 (imgs/sec) | +|:---|:---|:---:|:---:| +| | |FP32 / INT8 | FP32 / INT8 | +| ResNet18-V1 | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |369.00 / 778.82|799.7 / 2598.04| +| ResNet50-V1 | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |160.72 / 405.84|349.73 / 1297.65 | +| ResNet101-V1 | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 89.56 / 197.55| 193.25 / 740.47| +|Squeezenet 1.0|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 294.46 / 899.28| 857.70 / 3065.13| +|MobileNet 1.0|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |554.94 / 676.59|1279.44 / 3393.43| +|MobileNetV2 1.0|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |303.40 / 776.40|994.25 / 4227.77| +|Inception V3|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |108.20 / 219.20 | 232.22 / 870.09 | +|ResNet152-V2|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |52.28 / 64.62|107.03 / 134.04 | +|Inception-BN|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 211.86 / 306.37| 632.79 / 2115.28| + +The command line to launch inference by this script can accept are as shown below: +``` +./imagenet_inference --help +Usage: +imagenet_inference --symbol_file + --params_file + --dataset + --data_nthreads + --input_shape ] + --rgb_mean + --rgb_std + --batch_size + --num_skipped_batches + --num_inference_batches + --data_layer_type + --gpu + --enableTRT " + --benchmark +``` + +Follow the below steps to do inference with more models. + +- Download the pre-trained FP32 models into ```./model``` directory. +- Refer this [README](https://github.com/apache/incubator-mxnet/blob/master/example/quantization/README.md) to generate the corresponding quantized models and also put them into ```./model``` directory. +- Prepare [validation dataset](http://data.mxnet.io/data/val_256_q90.rec) and put it into ```./data``` directory. + +The below command lines show how to run inference with FP32/INT8 resnet50_v1 model. Because the C++ inference script provides the almost same command line as this [Python script](https://github.com/apache/incubator-mxnet/blob/master/example/quantization/imagenet_inference.py) and then users can easily go from Python to C++. +``` + +# FP32 inference +./imagenet_inference --symbol_file "./model/resnet50_v1-symbol.json" --params_file "./model/resnet50_v1-0000.params" --dataset "./data/val_256_q90.rec" --rgb_mean "123.68 116.779 103.939" --rgb_std "58.393 57.12 57.375" --batch_size 64 --num_skipped_batches 50 --num_inference_batches 500 + +# INT8 inference +./imagenet_inference --symbol_file "./model/resnet50_v1-quantized-5batches-naive-symbol.json" --params_file "./model/resnet50_v1-quantized-0000.params" --dataset "./data/val_256_q90.rec" --rgb_mean "123.68 116.779 103.939" --rgb_std "58.393 57.12 57.375" --batch_size 64 --num_skipped_batches 50 --num_inference_batches 500 + +# FP32 dummy data +./imagenet_inference --symbol_file "./model/resnet50_v1-symbol.json" --batch_size 64 --num_inference_batches 500 --benchmark + +# INT8 dummy data +./imagenet_inference --symbol_file "./model/resnet50_v1-quantized-5batches-naive-symbol.json" --batch_size 64 --num_inference_batches 500 --benchmark + +``` +For a quick inference test, users can directly run [unit_test_imagenet_inference.sh]() by using the below command. This script will automatically download the pre-trained **Inception-Bn** and **resnet50_v1_int8** model and **validation dataset** which are required for inference. + +``` +./unit_test_imagenet_inference.sh +``` +And you may get the similiar outputs like below: +``` +>>> INFO: FP32 real data +imagenet_inference.cpp:282: Loading the model from ./model/Inception-BN-symbol.json +imagenet_inference.cpp:295: Loading the model parameters from ./model/Inception-BN-0126.params +imagenet_inference.cpp:443: INFO:Dataset for inference: ./data/val_256_q90.rec +imagenet_inference.cpp:444: INFO:label_name = softmax_label +imagenet_inference.cpp:445: INFO:rgb_mean: (123.68, 116.779, 103.939) +imagenet_inference.cpp:447: INFO:rgb_std: (1, 1, 1) +imagenet_inference.cpp:449: INFO:Image shape: (3, 224, 224) +imagenet_inference.cpp:451: INFO:Finished inference with: 500 images +imagenet_inference.cpp:453: INFO:Batch size = 1 for inference +imagenet_inference.cpp:454: INFO:Accuracy: 0.744 +imagenet_inference.cpp:455: INFO:Throughput: xxxx images per second + +>>> INFO: FP32 dummy data +imagenet_inference.cpp:282: Loading the model from ./model/Inception-BN-symbol.json +imagenet_inference.cpp:372: Running the forward pass on model to evaluate the performance.. +imagenet_inference.cpp:387: benchmark completed! +imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms + +>>> INFO: INT8 dummy data +imagenet_inference.cpp:282: Loading the model from ./model/resnet50_v1_int8-symbol.json +imagenet_inference.cpp:372: Running the forward pass on model to evaluate the performance.. +imagenet_inference.cpp:387: benchmark completed! +imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms +``` +For running this example with TensorRT, you can quickly try the following example to run a benchmark test for testing Inception BN: +``` +./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --batch_size 16 --num_inference_batches 500 --benchmark --enableTRT +``` +Sample output will looks like this (the example is running on a AWS P3.2xl machine): +``` +imagenet_inference.cpp:302: Loading the model from ./model/Inception-BN-symbol.json +build_subgraph.cc:686: start to execute partition graph. +imagenet_inference.cpp:317: Loading the model parameters from ./model/Inception-BN-0126.params +imagenet_inference.cpp:424: Running the forward pass on model to evaluate the performance.. +imagenet_inference.cpp:439: benchmark completed! +imagenet_inference.cpp:440: batch size: 16 num batch: 500 throughput: 6284.78 imgs/s latency:0.159115 ms +``` + +## [sentiment_analysis_rnn.cpp]() +This example demonstrates how you can load a pre-trained RNN model and use it to predict the sentiment expressed in the given movie review with the MXNet C++ API. The example is capable of processing variable legnth inputs. It performs the following tasks +- Loads the pre-trained RNN model. +- Loads the dictionary file containing the word to index mapping. +- Splits the review in multiple lines separated by "." +- The example predicts the sentiment score for individual lines and outputs the average score. + +The example is capable of processing variable length input by implementing following technique: +- The example creates executors for pre-determined input lenghts such as 5, 10, 15, 20, 25, etc called **buckets**. +- Each bucket is identified by **bucket-key** representing the length on input required by corresponding executor. +- For each line in the review, the example finds the number of words in the line and tries to find a closest bucket or executor. +- If the bucket key does not match the number of words in the line, the example pads or trims the input line to match the required length. + +The example uses a pre-trained RNN model trained with a IMDB dataset. The RNN model was built by exercising the [GluonNLP Sentiment Analysis Tutorial](). The tutorial uses 'standard_lstm_lm_200' available in Gluon Model Zoo and fine tunes it for the IMDB dataset +The model consists of : +- Embedding Layer +- 2 LSTM Layers with hidden dimension size of 200 +- Average pooling layer +- Sigmoid output layer +The model was trained for 10 epochs to achieve 85% test accuracy. +The visual representation of the model is [here](). + +The model files can be found here. +- [sentiment_analysis-symbol.json](< https://s3.amazonaws.com/mxnet-cpp/RNN_model/sentiment_analysis-symbol.json>) +- [sentiment_analysis-0010.params](< https://s3.amazonaws.com/mxnet-cpp/RNN_model/sentiment_analysis-0010.params>) +- [sentiment_token_to_idx.txt]() Each line of the dictionary file contains a word and a unique index for that word, separated by a space, with a total of 32787 words generated from the training dataset. +The example downloads the above files while running. + +The example's command line parameters are as shown below: + +``` +./sentiment_analysis_rnn --help +Usage: +sentiment_analysis_rnn +--input Input movie review. The review can be single line or multiline.e.g. "This movie is the best." OR "This movie is the best. The direction is awesome." +[--gpu] Specify this option if workflow needs to be run in gpu context +If the review is multiline, the example predicts sentiment score for each line and the final score is the average of scores obtained for each line. + +``` + +The following command line shows running the example with the movie review containing only one line. + +``` +./sentiment_analysis_rnn --input "This movie has the great story" +``` + +The above command will output the sentiment score as follows: +``` +sentiment_analysis_rnn.cpp:346: Input Line : [This movie has the great story] Score : 0.999898 +sentiment_analysis_rnn.cpp:449: The sentiment score between 0 and 1, (1 being positive)=0.999898 +``` + +The following command line shows invoking the example with the multi-line review. + +``` +./sentiment_analysis_rnn --input "This movie is the best. The direction is awesome." +``` +The above command will output the sentiment score for each line in the review and average score as follows: +``` +Input Line : [This movie is the best] Score : 0.964498 +Input Line : [ The direction is awesome] Score : 0.968855 +The sentiment score between 0 and 1, (1 being positive)=0.966677 +``` + +Alternatively, you can run the [unit_test_sentiment_analysis_rnn.sh]() script. diff --git a/cpp-package/example/inference/imagenet_inference.cpp b/cpp-package/example/inference/imagenet_inference.cpp new file mode 100644 index 000000000000..845a227fe93d --- /dev/null +++ b/cpp-package/example/inference/imagenet_inference.cpp @@ -0,0 +1,662 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * This example demonstrates image classification workflow with pre-trained models using MXNet C++ API. + * The example performs following tasks. + * 1. Load the pre-trained model. + * 2. Load the parameters of pre-trained model. + * 3. Load the inference dataset and create a new ImageRecordIter. + * 4. Run the forward pass and obtain throughput & accuracy. + */ +#ifndef _WIN32 +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mxnet/c_api.h" +#include "mxnet/tuple.h" +#include "mxnet-cpp/MxNetCpp.h" +#include "mxnet-cpp/initializer.h" + +using namespace mxnet::cpp; + +double ms_now() { + double ret; +#ifdef _WIN32 + auto timePoint = std::chrono::high_resolution_clock::now().time_since_epoch(); + ret = std::chrono::duration(timePoint).count(); +#else + struct timeval time; + gettimeofday(&time, nullptr); + ret = 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; +#endif + return ret; +} + + +// define the data type for NDArray, aliged with the definition in mshadow/base.h +enum TypeFlag { + kFloat32 = 0, + kFloat64 = 1, + kFloat16 = 2, + kUint8 = 3, + kInt32 = 4, + kInt8 = 5, + kInt64 = 6, +}; + +/* + * class Predictor + * + * This class encapsulates the functionality to load the model, prepare dataset and run the forward pass. + */ + +class Predictor { + public: + Predictor() {} + Predictor(const std::string& model_json_file, + const std::string& model_params_file, + const Shape& input_shape, + bool use_gpu, + bool enable_tensorrt, + const std::string& dataset, + const int data_nthreads, + const std::string& data_layer_type, + const std::vector& rgb_mean, + const std::vector& rgb_std, + int shuffle_chunk_seed, + int seed, bool benchmark); + void BenchmarkScore(int num_inference_batches); + void Score(int num_skipped_batches, int num_inference_batches); + ~Predictor(); + + private: + bool CreateImageRecordIter(); + bool AdvanceDataIter(int skipped_batches); + void LoadModel(const std::string& model_json_file); + void LoadParameters(const std::string& model_parameters_file); + void SplitParamMap(const std::map ¶mMap, + std::map *argParamInTargetContext, + std::map *auxParamInTargetContext, + Context targetContext); + void ConvertParamMapToTargetContext(const std::map ¶mMap, + std::map *paramMapInTargetContext, + Context targetContext); + void InitParameters(); + + inline bool FileExists(const std::string &name) { + std::ifstream fhandle(name.c_str()); + return fhandle.good(); + } + int GetDataLayerType(); + + std::map args_map_; + std::map aux_map_; + Symbol net_; + Executor *executor_; + Shape input_shape_; + Context global_ctx_ = Context::cpu(); + + MXDataIter *val_iter_; + bool use_gpu_; + bool enable_tensorrt_; + std::string dataset_; + int data_nthreads_; + std::string data_layer_type_; + std::vector rgb_mean_; + std::vector rgb_std_; + int shuffle_chunk_seed_; + int seed_; + bool benchmark_; +}; + + +/* + * The constructor takes following parameters as input: + * 1. model_json_file: The model in json formatted file. + * 2. model_params_file: File containing model parameters + * 3. input_shape: Shape of input data to the model. Since this class will be running one inference at a time, + * the input shape is required to be in format Shape(1, number_of_channels, height, width) + * The input image will be resized to (height x width) size before running the inference. + * 4. use_gpu: determine if run inference on GPU + * 5. enable_tensorrt: determine if enable TensorRT + * 6. dataset: data file (.rec) to be used for inference + * 7. data_nthreads: number of threads for data loading + * 8. data_layer_type: data type for data layer + * 9. rgb_mean: mean value to be subtracted on R/G/B channel + * 10. rgb_std: standard deviation on R/G/B channel + * 11. shuffle_chunk_seed: shuffling chunk seed + * 12. seed: shuffling seed + * 13. benchmark: use dummy data for inference + * + * The constructor will: + * 1. Create ImageRecordIter based on the given dataset file. + * 2. Load the model and parameter files. + * 3. Infer and construct NDArrays according to the input argument and create an executor. + */ +Predictor::Predictor(const std::string& model_json_file, + const std::string& model_params_file, + const Shape& input_shape, + bool use_gpu, + bool enable_tensorrt, + const std::string& dataset, + const int data_nthreads, + const std::string& data_layer_type, + const std::vector& rgb_mean, + const std::vector& rgb_std, + int shuffle_chunk_seed, + int seed, bool benchmark) + : input_shape_(input_shape), + use_gpu_(use_gpu), + enable_tensorrt_(enable_tensorrt), + dataset_(dataset), + data_nthreads_(data_nthreads), + data_layer_type_(data_layer_type), + rgb_mean_(rgb_mean), + rgb_std_(rgb_std), + shuffle_chunk_seed_(shuffle_chunk_seed), + seed_(seed), + benchmark_(benchmark) { + if (use_gpu) { + global_ctx_ = Context::gpu(); + } + + // initilize data iterator + if (!benchmark_ && !CreateImageRecordIter()) { + LG << "Error: failed to create ImageRecordIter"; + throw std::runtime_error("ImageRecordIter cannot be created"); + } + + // Load the model + LoadModel(model_json_file); + // Initilize the parameters + // benchmark=true && model_params_file.empty(), randomly initialize parameters + // else, load parameters + if (benchmark_ && model_params_file.empty()) { + InitParameters(); + } else { + LoadParameters(model_params_file); + } + + int dtype = GetDataLayerType(); + if (dtype == -1) { + throw std::runtime_error("Unsupported data layer type..."); + } + args_map_["data"] = NDArray(input_shape_, global_ctx_, false, dtype); + Shape label_shape(input_shape_[0]); + args_map_["softmax_label"] = NDArray(label_shape, global_ctx_, false); + std::vector arg_arrays; + std::vector grad_arrays; + std::vector grad_reqs; + std::vector aux_arrays; + + // infer and create ndarrays according to the given input ndarrays. + net_.InferExecutorArrays(global_ctx_, &arg_arrays, &grad_arrays, &grad_reqs, + &aux_arrays, args_map_, std::map(), + std::map(), aux_map_); + for (auto& i : grad_reqs) i = OpReqType::kNullOp; + + // Create an executor after binding the model to input parameters. + executor_ = new Executor(net_, global_ctx_, arg_arrays, grad_arrays, grad_reqs, aux_arrays); +} + +/* + * The following function is used to get the data layer type for input data + */ +int Predictor::GetDataLayerType() { + int ret_type = -1; + if (data_layer_type_ == "float32") { + ret_type = kFloat32; + } else if (data_layer_type_ == "int8") { + ret_type = kInt8; + } else if (data_layer_type_ == "uint8") { + ret_type = kUint8; + } else { + LG << "Unsupported data layer type " << data_layer_type_ << "..." + << "Please use one of {float32, int8, uint8}"; + } + return ret_type; +} + +/* + * create a new ImageRecordIter according to the given parameters + */ +bool Predictor::CreateImageRecordIter() { + val_iter_ = new MXDataIter("ImageRecordIter"); + if (!FileExists(dataset_)) { + LG << "Error: " << dataset_ << " must be provided"; + return false; + } + + std::vector shape_vec; + for (index_t i = 1; i < input_shape_.ndim(); i++) + shape_vec.push_back(input_shape_[i]); + mxnet::TShape data_shape(shape_vec.begin(), shape_vec.end()); + + // set image record parser parameters + val_iter_->SetParam("path_imgrec", dataset_); + val_iter_->SetParam("label_width", 1); + val_iter_->SetParam("data_shape", data_shape); + val_iter_->SetParam("preprocess_threads", data_nthreads_); + val_iter_->SetParam("shuffle_chunk_seed", shuffle_chunk_seed_); + + // set Batch parameters + val_iter_->SetParam("batch_size", input_shape_[0]); + + // image record parameters + val_iter_->SetParam("shuffle", true); + val_iter_->SetParam("seed", seed_); + + // set normalize parameters + val_iter_->SetParam("mean_r", rgb_mean_[0]); + val_iter_->SetParam("mean_g", rgb_mean_[1]); + val_iter_->SetParam("mean_b", rgb_mean_[2]); + val_iter_->SetParam("std_r", rgb_std_[0]); + val_iter_->SetParam("std_g", rgb_std_[1]); + val_iter_->SetParam("std_b", rgb_std_[2]); + + // set prefetcher parameters + if (use_gpu_) { + val_iter_->SetParam("ctx", "gpu"); + } else { + val_iter_->SetParam("ctx", "cpu"); + } + val_iter_->SetParam("dtype", data_layer_type_); + + val_iter_->CreateDataIter(); + return true; +} + +/* + * The following function loads the model from json file. + */ +void Predictor::LoadModel(const std::string& model_json_file) { + if (!FileExists(model_json_file)) { + LG << "Model file " << model_json_file << " does not exist"; + throw std::runtime_error("Model file does not exist"); + } + LG << "Loading the model from " << model_json_file << std::endl; + net_ = Symbol::Load(model_json_file); + if (enable_tensorrt_) { + net_ = net_.GetBackendSymbol("TensorRT"); + } +} + +/* + * The following function loads the model parameters. + */ +void Predictor::LoadParameters(const std::string& model_parameters_file) { + if (!FileExists(model_parameters_file)) { + LG << "Parameter file " << model_parameters_file << " does not exist"; + throw std::runtime_error("Model parameters does not exist"); + } + LG << "Loading the model parameters from " << model_parameters_file << std::endl; + std::map parameters; + NDArray::Load(model_parameters_file, 0, ¶meters); + if (enable_tensorrt_) { + std::map intermediate_args_map; + std::map intermediate_aux_map; + SplitParamMap(parameters, &intermediate_args_map, &intermediate_aux_map, Context::cpu()); + contrib::InitTensorRTParams(net_, &intermediate_args_map, &intermediate_aux_map); + ConvertParamMapToTargetContext(intermediate_args_map, &args_map_, global_ctx_); + ConvertParamMapToTargetContext(intermediate_aux_map, &aux_map_, global_ctx_); + } else { + SplitParamMap(parameters, &args_map_, &aux_map_, global_ctx_); + } + /*WaitAll is need when we copy data between GPU and the main memory*/ + NDArray::WaitAll(); +} + +/* + * The following function split loaded param map into arg parm + * and aux param with target context + */ +void Predictor::SplitParamMap(const std::map ¶mMap, + std::map *argParamInTargetContext, + std::map *auxParamInTargetContext, + Context targetContext) { + for (const auto& pair : paramMap) { + std::string type = pair.first.substr(0, 4); + std::string name = pair.first.substr(4); + if (type == "arg:") { + (*argParamInTargetContext)[name] = pair.second.Copy(targetContext); + } else if (type == "aux:") { + (*auxParamInTargetContext)[name] = pair.second.Copy(targetContext); + } + } +} + +/* + * The following function copy the param map into the target context + */ +void Predictor::ConvertParamMapToTargetContext(const std::map ¶mMap, + std::map *paramMapInTargetContext, + Context targetContext) { + for (const auto& pair : paramMap) { + (*paramMapInTargetContext)[pair.first] = pair.second.Copy(targetContext); + } +} + +/* + * The following function randomly initializes the parameters when benchmark_ is true. + */ +void Predictor::InitParameters() { + std::vector data_shape; + for (index_t i = 0; i < input_shape_.ndim(); i++) { + data_shape.push_back(input_shape_[i]); + } + + std::map > arg_shapes; + std::vector > aux_shapes, in_shapes, out_shapes; + arg_shapes["data"] = data_shape; + net_.InferShape(arg_shapes, &in_shapes, &aux_shapes, &out_shapes); + + // initializer to call + Xavier xavier(Xavier::uniform, Xavier::avg, 2.0f); + + auto arg_name_list = net_.ListArguments(); + for (index_t i = 0; i < in_shapes.size(); i++) { + const auto &shape = in_shapes[i]; + const auto &arg_name = arg_name_list[i]; + int paramType = kFloat32; + if (Initializer::StringEndWith(arg_name, "weight_quantize") || + Initializer::StringEndWith(arg_name, "bias_quantize")) { + paramType = kInt8; + } + NDArray tmp_arr(shape, global_ctx_, false, paramType); + xavier(arg_name, &tmp_arr); + args_map_[arg_name] = tmp_arr.Copy(global_ctx_); + } + + auto aux_name_list = net_.ListAuxiliaryStates(); + for (index_t i = 0; i < aux_shapes.size(); i++) { + const auto &shape = aux_shapes[i]; + const auto &aux_name = aux_name_list[i]; + NDArray tmp_arr(shape, global_ctx_, false); + xavier(aux_name, &tmp_arr); + aux_map_[aux_name] = tmp_arr.Copy(global_ctx_); + } + /*WaitAll is need when we copy data between GPU and the main memory*/ + NDArray::WaitAll(); +} + +/* + * The following function runs the forward pass on the model + * and use dummy data for benchmark. + */ +void Predictor::BenchmarkScore(int num_inference_batches) { + // Create dummy data + std::vector dummy_data(input_shape_.Size()); + std::default_random_engine generator; + std::uniform_real_distribution val(0.0f, 1.0f); + for (size_t i = 0; i < static_cast(input_shape_.Size()); ++i) { + dummy_data[i] = static_cast(val(generator)); + } + executor_->arg_dict()["data"].SyncCopyFromCPU( + dummy_data.data(), + input_shape_.Size()); + NDArray::WaitAll(); + + LG << "Running the forward pass on model to evaluate the performance.."; + + // warm up. + for (int i = 0; i < 5; i++) { + executor_->Forward(false); + NDArray::WaitAll(); + } + + // Run the forward pass. + double ms = ms_now(); + for (int i = 0; i < num_inference_batches; i++) { + executor_->Forward(false); + NDArray::WaitAll(); + } + ms = ms_now() - ms; + LG << " benchmark completed!"; + LG << " batch size: " << input_shape_[0] << " num batch: " << num_inference_batches + << " throughput: " << 1000.0 * input_shape_[0] * num_inference_batches / ms + << " imgs/s latency:" << ms / input_shape_[0] / num_inference_batches << " ms"; +} + +/* + * \param skipped_batches skip the first number of batches + * + */ +bool Predictor::AdvanceDataIter(int skipped_batches) { + assert(skipped_batches >= 0); + if (skipped_batches == 0) return true; + int skipped_count = 0; + while (val_iter_->Next()) { + if (++skipped_count >= skipped_batches) break; + } + if (skipped_count != skipped_batches) return false; + return true; +} + +/* + * The following function runs the forward pass on the model + * and use real data for testing accuracy and performance. + */ +void Predictor::Score(int num_skipped_batches, int num_inference_batches) { + // Create metrics + Accuracy val_acc; + + val_iter_->Reset(); + val_acc.Reset(); + int nBatch = 0; + + if (!AdvanceDataIter(num_skipped_batches)) { + LG << "skipped batches should less than total batches!"; + return; + } + + double ms = ms_now(); + while (val_iter_->Next()) { + auto data_batch = val_iter_->GetDataBatch(); + data_batch.data.CopyTo(&args_map_["data"]); + data_batch.label.CopyTo(&args_map_["softmax_label"]); + NDArray::WaitAll(); + + // running on forward pass + executor_->Forward(false); + NDArray::WaitAll(); + val_acc.Update(data_batch.label, executor_->outputs[0]); + + if (++nBatch >= num_inference_batches) { + break; + } + } + ms = ms_now() - ms; + auto args_name = net_.ListArguments(); + LG << "INFO:" << "Dataset for inference: " << dataset_; + LG << "INFO:" << "label_name = " << args_name[args_name.size()-1]; + LG << "INFO:" << "rgb_mean: " << "(" << rgb_mean_[0] << ", " << rgb_mean_[1] + << ", " << rgb_mean_[2] << ")"; + LG << "INFO:" << "rgb_std: " << "(" << rgb_std_[0] << ", " << rgb_std_[1] + << ", " << rgb_std_[2] << ")"; + LG << "INFO:" << "Image shape: " << "(" << input_shape_[1] << ", " + << input_shape_[2] << ", " << input_shape_[3] << ")"; + LG << "INFO:" << "Finished inference with: " << nBatch * input_shape_[0] + << " images "; + LG << "INFO:" << "Batch size = " << input_shape_[0] << " for inference"; + LG << "INFO:" << "Accuracy: " << val_acc.Get(); + LG << "INFO:" << "Throughput: " << (1000.0 * nBatch * input_shape_[0] / ms) + << " images per second"; +} + +Predictor::~Predictor() { + if (executor_) { + delete executor_; + } + if (!benchmark_ && val_iter_) { + delete val_iter_; + } + MXNotifyShutdown(); +} + +/* + * Convert the input string of number into the vector. + */ +template +std::vector createVectorFromString(const std::string& input_string) { + std::vector dst_vec; + char *p_next; + T elem; + bool bFloat = std::is_same::value; + if (!bFloat) { + elem = strtol(input_string.c_str(), &p_next, 10); + } else { + elem = strtof(input_string.c_str(), &p_next); + } + + dst_vec.push_back(elem); + while (*p_next) { + if (!bFloat) { + elem = strtol(p_next, &p_next, 10); + } else { + elem = strtof(p_next, &p_next); + } + dst_vec.push_back(elem); + } + return dst_vec; +} + +void printUsage() { + std::cout << "Usage:" << std::endl; + std::cout << "imagenet_inference --symbol_file " << std::endl + << "--params_file " << std::endl + << "--dataset " << std::endl + << "--data_nthreads " << std::endl + << "--input_shape ] " << std::endl + << "--rgb_mean " + << std::endl + << "--rgb_std " << std::endl + << "--batch_size " << std::endl + << "--num_skipped_batches " << std::endl + << "--num_inference_batches " << std::endl + << "--data_layer_type " << std::endl + << "--gpu " << std::endl + << "--enableTRT " << std::endl + << "--benchmark " + << std::endl; +} + +int main(int argc, char** argv) { + std::string model_file_json; + std::string model_file_params; + std::string dataset(""); + std::string input_rgb_mean("0 0 0"); + std::string input_rgb_std("1 1 1"); + bool use_gpu = false; + bool enable_tensorrt = false; + bool benchmark = false; + int batch_size = 64; + int num_skipped_batches = 0; + int num_inference_batches = 100; + std::string data_layer_type("float32"); + std::string input_shape("3 224 224"); + int seed = 48564309; + int shuffle_chunk_seed = 3982304; + int data_nthreads = 60; + + int index = 1; + while (index < argc) { + if (strcmp("--symbol_file", argv[index]) == 0) { + index++; + model_file_json = (index < argc ? argv[index]:""); + } else if (strcmp("--params_file", argv[index]) == 0) { + index++; + model_file_params = (index < argc ? argv[index]:""); + } else if (strcmp("--dataset", argv[index]) == 0) { + index++; + dataset = (index < argc ? argv[index]:dataset); + } else if (strcmp("--data_nthreads", argv[index]) == 0) { + index++; + data_nthreads = strtol(argv[index], nullptr, 10); + } else if (strcmp("--input_shape", argv[index]) == 0) { + index++; + input_shape = (index < argc ? argv[index]:input_shape); + } else if (strcmp("--rgb_mean", argv[index]) == 0) { + index++; + input_rgb_mean = (index < argc ? argv[index]:input_rgb_mean); + } else if (strcmp("--rgb_std", argv[index]) == 0) { + index++; + input_rgb_std = (index < argc ? argv[index]:input_rgb_std); + } else if (strcmp("--batch_size", argv[index]) == 0) { + index++; + batch_size = strtol(argv[index], nullptr, 10); + } else if (strcmp("--num_skipped_batches", argv[index]) == 0) { + index++; + num_skipped_batches = strtol(argv[index], nullptr, 10); + } else if (strcmp("--num_inference_batches", argv[index]) == 0) { + index++; + num_inference_batches = strtol(argv[index], nullptr, 10); + } else if (strcmp("--data_layer_type", argv[index]) == 0) { + index++; + data_layer_type = (index < argc ? argv[index]:data_layer_type); + } else if (strcmp("--gpu", argv[index]) == 0) { + use_gpu = true; + } else if (strcmp("--enableTRT", argv[index]) == 0) { + use_gpu = true; + enable_tensorrt = true; + } else if (strcmp("--benchmark", argv[index]) == 0) { + benchmark = true; + } else if (strcmp("--help", argv[index]) == 0) { + printUsage(); + return 0; + } + index++; + } + + if (model_file_json.empty() + || (!benchmark && model_file_params.empty()) + || (enable_tensorrt && model_file_params.empty())) { + LG << "ERROR: Model details such as symbol, param files are not specified"; + printUsage(); + return 1; + } + std::vector input_dimensions = createVectorFromString(input_shape); + input_dimensions.insert(input_dimensions.begin(), batch_size); + Shape input_data_shape(input_dimensions); + + std::vector rgb_mean = createVectorFromString(input_rgb_mean); + std::vector rgb_std = createVectorFromString(input_rgb_std); + + // Initialize the predictor object + Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, enable_tensorrt, + dataset, data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed, + seed, benchmark); + + if (benchmark) { + predict.BenchmarkScore(num_inference_batches); + } else { + predict.Score(num_skipped_batches, num_inference_batches); + } + return 0; +} diff --git a/cpp-package/example/inference/multi_threaded_inference/get_model.py b/cpp-package/example/inference/multi_threaded_inference/get_model.py new file mode 100644 index 000000000000..75a5d039c61d --- /dev/null +++ b/cpp-package/example/inference/multi_threaded_inference/get_model.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +import argparse +import requests +import errno +import os + +models = ["imagenet1k-inception-bn", "imagenet1k-resnet-50", + "imagenet1k-resnet-152", "imagenet1k-resnet-18"] + +def download(url, fname=None, dirname=None, overwrite=False, retries=5): + """Download an given URL + + Parameters + ---------- + + url : str + URL to download + fname : str, optional + filename of the downloaded file. If None, then will guess a filename + from url. + dirname : str, optional + output directory name. If None, then guess from fname or use the current + directory + overwrite : bool, optional + Default is false, which means skipping download if the local file + exists. If true, then download the url to overwrite the local file if + exists. + retries : integer, default 5 + The number of times to attempt the download in case of failure or non 200 return codes + + Returns + ------- + str + The filename of the downloaded file + """ + + assert retries >= 0, "Number of retries should be at least 0" + + if fname is None: + fname = url.split('/')[-1] + + if dirname is None: + dirname = os.path.dirname(fname) + else: + fname = os.path.join(dirname, fname) + if dirname != "": + if not os.path.exists(dirname): + try: + logging.info('create directory %s', dirname) + os.makedirs(dirname) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise OSError('failed to create ' + dirname) + + if not overwrite and os.path.exists(fname): + logging.info("%s exists, skipping download", fname) + return fname + + while retries+1 > 0: + # Disable pyling too broad Exception + # pylint: disable=W0703 + try: + r = requests.get(url, stream=True) + assert r.status_code == 200, "failed to open %s" % url + with open(fname, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + break + except Exception as e: + retries -= 1 + if retries <= 0: + raise e + + print("download failed, retrying, {} attempt{} left" + .format(retries, 's' if retries > 1 else '')) + logging.info("downloaded %s into %s successfully", url, fname) + return fname + +def download_model(model_name, dst_dir='./', meta_info=None): + """Download a model from data.mxnet.io + + Parameters + ---------- + model_name : str + Model name to download + dst_dir : str + Destination Directory to download the model + meta_info : dict of dict + Mapping from model_name to dict of the following structure: + {'symbol': url, 'params': url} + + Returns + ------- + Two element tuple containing model_name and epoch for the params saved + """ + _base_model_url = 'http://data.mxnet.io/models/' + _default_model_info = { + 'imagenet1k-inception-bn': {'symbol':_base_model_url+'imagenet/inception-bn/Inception-BN-symbol.json', + 'params':_base_model_url+'imagenet/inception-bn/Inception-BN-0126.params'}, + 'imagenet1k-resnet-18': {'symbol':_base_model_url+'imagenet/resnet/18-layers/resnet-18-symbol.json', + 'params':_base_model_url+'imagenet/resnet/18-layers/resnet-18-0000.params'}, + 'imagenet1k-resnet-34': {'symbol':_base_model_url+'imagenet/resnet/34-layers/resnet-34-symbol.json', + 'params':_base_model_url+'imagenet/resnet/34-layers/resnet-34-0000.params'}, + 'imagenet1k-resnet-50': {'symbol':_base_model_url+'imagenet/resnet/50-layers/resnet-50-symbol.json', + 'params':_base_model_url+'imagenet/resnet/50-layers/resnet-50-0000.params'}, + 'imagenet1k-resnet-101': {'symbol':_base_model_url+'imagenet/resnet/101-layers/resnet-101-symbol.json', + 'params':_base_model_url+'imagenet/resnet/101-layers/resnet-101-0000.params'}, + 'imagenet1k-resnet-152': {'symbol':_base_model_url+'imagenet/resnet/152-layers/resnet-152-symbol.json', + 'params':_base_model_url+'imagenet/resnet/152-layers/resnet-152-0000.params'}, + 'imagenet1k-resnext-50': {'symbol':_base_model_url+'imagenet/resnext/50-layers/resnext-50-symbol.json', + 'params':_base_model_url+'imagenet/resnext/50-layers/resnext-50-0000.params'}, + 'imagenet1k-resnext-101': {'symbol':_base_model_url+'imagenet/resnext/101-layers/resnext-101-symbol.json', + 'params':_base_model_url+'imagenet/resnext/101-layers/resnext-101-0000.params'}, + 'imagenet1k-resnext-101-64x4d': + {'symbol':_base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-symbol.json', + 'params':_base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-0000.params'}, + 'imagenet11k-resnet-152': + {'symbol':_base_model_url+'imagenet-11k/resnet-152/resnet-152-symbol.json', + 'params':_base_model_url+'imagenet-11k/resnet-152/resnet-152-0000.params'}, + 'imagenet11k-place365ch-resnet-152': + {'symbol':_base_model_url+'imagenet-11k-place365-ch/resnet-152-symbol.json', + 'params':_base_model_url+'imagenet-11k-place365-ch/resnet-152-0000.params'}, + 'imagenet11k-place365ch-resnet-50': + {'symbol':_base_model_url+'imagenet-11k-place365-ch/resnet-50-symbol.json', + 'params':_base_model_url+'imagenet-11k-place365-ch/resnet-50-0000.params'}, + } + + + if meta_info is None: + meta_info = _default_model_info + meta_info = dict(meta_info) + if model_name not in meta_info: + return (None, 0) + if not os.path.isdir(dst_dir): + os.mkdir(dst_dir) + meta = dict(meta_info[model_name]) + assert 'symbol' in meta, "missing symbol url" + model_name = os.path.join(dst_dir, model_name) + download(meta['symbol'], model_name+'-symbol.json') + assert 'params' in meta, "mssing parameter file url" + download(meta['params'], model_name+'-0000.params') + download(_base_model_url + 'imagenet/synset.txt') + return (model_name, 0) + +def main(): + logging.basicConfig() + logger = logging.getLogger("logger") + logger.setLevel(logging.INFO) + parser = argparse.ArgumentParser(description='Download model hybridize and save as symbolic model for multithreaded inference') + parser.add_argument("--model", type=str, choices=models, required=True) + args = parser.parse_args() + + download_model(args.model) + +if __name__ == "__main__": + main() diff --git a/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc b/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc new file mode 100644 index 000000000000..b9d94b75a296 --- /dev/null +++ b/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2017 by Contributors + * \file multi_threaded_inference.cc + * \brief Multi Threaded inference example with CachedOp +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" +#include + +const float DEFAULT_MEAN = 117.0; + + +// Code to load image, PrintOutput results, helper functions for the same obtained from: +// https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/predict-cpp/ + +static std::string trim(const std::string &input) { + auto not_space = [](int ch) { return !std::isspace(ch); }; + auto output = input; + output.erase(output.begin(), + std::find_if(output.begin(), output.end(), not_space)); + output.erase(std::find_if(output.rbegin(), output.rend(), not_space).base(), + output.end()); + return output; +} + +std::vector LoadSynset(const std::string& synset_file) { + std::ifstream fi(synset_file.c_str()); + + if (!fi.is_open()) { + std::cerr << "Error opening synset file " << synset_file << std::endl; + assert(false); + } + + std::vector output; + + std::string synset, lemma; + while (fi >> synset) { + getline(fi, lemma); + output.push_back(lemma); + } + + fi.close(); + + return output; +} + +void PrintOutputResult(const float* data, size_t size, const std::vector& synset) { + if (size != synset.size()) { + std::cerr << "Result data and synset size do not match!" << std::endl; + } + + float best_accuracy = 0.0; + std::size_t best_idx = 0; + + for (std::size_t i = 0; i < size; ++i) { + if (data[i] > best_accuracy) { + best_accuracy = data[i]; + best_idx = i; + } + } + + std::cout << "Best Result: " << trim(synset[best_idx]) << " (id=" << best_idx << ", " << + "accuracy=" << std::setprecision(8) << best_accuracy << ")" << std::endl; +} + + +// Read Image data into a float array +void GetImageFile(const std::string &image_file, float *image_data, + int channels, cv::Size resize_size) { + // Read all kinds of file into a BGR color 3 channels image + cv::Mat im_ori = cv::imread(image_file, cv::IMREAD_COLOR); + + if (im_ori.empty()) { + std::cerr << "Can't open the image. Plase check " << image_file << ". \n"; + assert(false); + } + + cv::Mat im; + resize(im_ori, im, resize_size); + + int size = im.rows * im.cols * channels; + + float* ptr_image_r = image_data; + float* ptr_image_g = image_data + size / 3; + float* ptr_image_b = image_data + size / 3 * 2; + + float mean_b, mean_g, mean_r; + mean_b = mean_g = mean_r = DEFAULT_MEAN; + + for (int i = 0; i < im.rows; ++i) { + auto data = im.ptr(i); + for (int j = 0; j < im.cols; j++) { + if (channels > 1) { + *ptr_image_b++ = static_cast(*data++) - mean_b; + *ptr_image_g++ = static_cast(*data++) - mean_g; + } + } + *ptr_image_r++ = static_cast(*data++) - mean_r; + } +} + +void prepare_input_data(const mxnet::cpp::Shape& shape, const mxnet::cpp::Context& ctx, + int num_threads, + std::vector* data_arr, + bool random_uniform = false) { + for (size_t i = 0; i < num_threads; ++i) { + data_arr->emplace_back(shape, ctx, false, 0); + int begin = i * 100; + int end = begin + 100; + if (random_uniform) { + mxnet::cpp::Operator("_random_uniform")(begin, end) + .Invoke((*data_arr)[i]); + } + mxnet::cpp::NDArray::WaitAll(); + } +} + +// Run inference on a model +void run_inference(const std::string& model_name, + const std::vector& input_arrs, + std::vector *output_mx_arr, + int num_inf_per_thread = 1, bool random_sleep = false, + int num_threads = 1, bool static_alloc = false, + bool static_shape = false, + bool is_gpu = false) { + LOG(INFO) << "Running inference for " + model_name + + " num_threads: " + std::to_string(num_threads) + + " num_inf_per_thread: " + std::to_string(num_inf_per_thread) + + " random_sleep: " + std::to_string(random_sleep) + + " static_alloc: " + std::to_string(static_alloc) + + " static_shape: " + std::to_string(static_shape); + std::string json_file = model_name + "-symbol.json"; + std::string param_file = model_name + "-0000.params"; + auto out = mxnet::cpp::Symbol::Load(json_file); + std::string static_alloc_str = static_alloc ? "true" : "false"; + std::string static_shape_str = static_shape ? "true" : "false"; + + // Prepare context +# if MXNET_USE_CUDA == 1 + mxnet::Context backend_ctx; + mxnet::cpp::Context ctx = mxnet::cpp::Context::cpu(0); + if (is_gpu) { + backend_ctx = mxnet::Context::GPU(0); + ctx = mxnet::cpp::Context::gpu(0); + } else { + backend_ctx = mxnet::Context::CPU(0); + ctx = mxnet::cpp::Context::cpu(0); + } +# else + mxnet::Context backend_ctx = mxnet::Context::CPU(0); + mxnet::cpp::Context ctx = mxnet::cpp::Context::cpu(0); +#endif + + // Prepare input data and parameters + std::vector data_arr(num_threads); + std::vector softmax_arr; + std::vector params; + mxnet::cpp::Shape data_shape = mxnet::cpp::Shape(1, 3, 224, 224); + mxnet::cpp::Shape softmax_shape = mxnet::cpp::Shape(1); + int num_inputs = out.ListInputs().size(); + + for (size_t i = 0; i < data_arr.size(); ++i) { + data_arr[i] = input_arrs[i].Copy(ctx); + } + prepare_input_data(softmax_shape, ctx, num_threads, &softmax_arr); + std::map parameters; + mxnet::cpp::NDArray::Load(param_file, 0, ¶meters); + + for (const std::string& name : out.ListInputs()) { + if (name == "arg:data") { + continue; + } + if (parameters.find("arg:" + name) != parameters.end()) { + params.push_back(parameters["arg:" + name].Copy(ctx)); + } else if (parameters.find("aux:" + name) != parameters.end()) { + params.push_back(parameters["aux:" + name].Copy(ctx)); + } + } + + CachedOpHandle hdl = CachedOpHandle(); + + std::vector flag_keys{"data_indices", "param_indices", + "static_alloc", "static_shape"}; + std::string param_indices = "["; + for (size_t i = 1; i < num_inputs; ++i) { + param_indices += std::to_string(i); + param_indices += std::string(", "); + } + param_indices += "]"; + std::vector flag_vals{"[0]", param_indices, static_alloc_str, + static_shape_str}; + std::vector flag_key_cstrs, flag_val_cstrs; + flag_key_cstrs.reserve(flag_keys.size()); + for (size_t i = 0; i < flag_keys.size(); ++i) { + flag_key_cstrs.emplace_back(flag_keys[i].c_str()); + } + for (size_t i = 0; i < flag_vals.size(); ++i) { + flag_val_cstrs.emplace_back(flag_vals[i].c_str()); + } + + int ret1 = MXCreateCachedOp(out.GetHandle(), flag_keys.size(), + flag_key_cstrs.data(), flag_val_cstrs.data(), + &hdl, true); + if (ret1 < 0) { + LOG(FATAL) << MXGetLastError(); + } + + // Prepare data structures and lambda to run in different threads + std::vector cached_op_handles(num_threads); + + std::vector> arr_handles(num_threads); + for (size_t i = 0; i < num_threads; ++i) { + arr_handles[i].reserve(num_inputs); + arr_handles[i].emplace_back(data_arr[i].GetHandle()); + for (size_t j = 1; j < num_inputs - 1; ++j) { + arr_handles[i].emplace_back(params[j - 1].GetHandle()); + } + arr_handles[i].emplace_back(softmax_arr[i].GetHandle()); + } + + auto func = [&](int num) { + unsigned next = num; + if (random_sleep) { + static thread_local std::mt19937 generator; + std::uniform_int_distribution distribution(0, 5); + int sleep_time = distribution(generator); + std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); + } + int num_output = 0; + const int *stypes; + int ret = MXInvokeCachedOp(hdl, arr_handles[num].size(), arr_handles[num].data(), + ctx.GetDeviceType(), 0, &num_output, + &(cached_op_handles[num]), &stypes); + if (ret < 0) { + LOG(FATAL) << MXGetLastError(); + } + (*output_mx_arr)[num] = static_cast(*cached_op_handles[num]); + }; + + // Spawn multiple threads, join and wait for threads to complete + std::vector worker_threads(num_threads); + int count = 0; + for (auto &&i : worker_threads) { + i = std::thread(func, count); + count++; + } + + for (auto &&i : worker_threads) { + i.join(); + } + + mxnet::cpp::NDArray::WaitAll(); + + std::string synset_file = "synset.txt"; + auto synset = LoadSynset(synset_file); + std::vector tmp(num_threads); + for (size_t i = 0; i < num_threads; i++) { + tmp[i] = (*output_mx_arr)[i]->Copy(mxnet::Context::CPU(0)); + tmp[i].WaitToRead(); + (*output_mx_arr)[i] = &tmp[i]; + } + for (size_t i = 0; i < num_threads; ++i) { + PrintOutputResult(static_cast((*output_mx_arr)[i]->data().dptr_), + (*output_mx_arr)[i]->shape().Size(), synset); + } + int ret2 = MXFreeCachedOp(hdl); + if (ret2 < 0) { + LOG(FATAL) << MXGetLastError(); + } + mxnet::cpp::NDArray::WaitAll(); +} + +int main(int argc, char *argv[]) { + if (argc < 4) { + std::cout << "Please provide a model name, is_gpu, test_image" << std::endl + << "Usage: ./multi_threaded_inference [model_name] [is_gpu] [file_names]" + << std::endl + << "Example: ./.multi_threaded_inference imagenet1k-inception-bn 0 apple.jpg" + << std::endl + << "NOTE: Thread number ordering will be based on the ordering of file inputs" + << std::endl + << "NOTE: Epoch is assumed to be 0" << std::endl; + return EXIT_FAILURE; + } + std::string model_name = std::string(argv[1]); + bool is_gpu = std::atoi(argv[2]); + CHECK(argc >= 4) << "Number of files provided should be atleast 1"; + int num_threads = argc - 3; + std::vector test_files; + for (size_t i = 0; i < argc - 3; ++i) { + test_files.emplace_back(argv[3 + i]); + } + int epoch = 0; + bool static_alloc = true; + bool static_shape = true; + + + // Image size and channels + size_t width = 224; + size_t height = 224; + size_t channels = 3; + + size_t image_size = width * height * channels; + + // Read Image Data + // load into an input arr + std::vector> files(num_threads); + std::vector input_arrs; + mxnet::cpp::Shape input_shape = mxnet::cpp::Shape(1, 3, 224, 224); + for (size_t i = 0; i < files.size(); i++) { + files[i].resize(image_size); + GetImageFile(test_files[i], files[i].data(), channels, + cv::Size(width, height)); + input_arrs.emplace_back(mxnet::cpp::NDArray(files[i].data(), + input_shape, mxnet::cpp::Context::cpu(0))); + } + + // load symbol + std::string static_alloc_str = static_alloc ? "true" : "false"; + std::string static_shape_str = static_shape ? "true" : "false"; + std::vector output_mx_arr(num_threads); + run_inference(model_name, input_arrs, &output_mx_arr, 1, false, num_threads, + static_alloc, static_shape, is_gpu); + mxnet::cpp::NDArray::WaitAll(); + + return 0; +} diff --git a/cpp-package/example/inference/multi_threaded_inference/unit_test_multi_threaded_inference.sh b/cpp-package/example/inference/multi_threaded_inference/unit_test_multi_threaded_inference.sh new file mode 100755 index 000000000000..7bd97c19b604 --- /dev/null +++ b/cpp-package/example/inference/multi_threaded_inference/unit_test_multi_threaded_inference.sh @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# http://mxnet.apache.org/versions/master/api/cpp/docs/tutorials/multi_threaded_inference.html + +# Install test data. +wget https://github.com/tensorflow/tensorflow/raw/master/tensorflow/examples/label_image/data/grace_hopper.jpg +wget http://optipng.sourceforge.net/pngtech/img/lena.png + +# Get Model. +python3 get_model.py --model imagenet1k-inception-bn + +# Run test +./multi_threaded_inference imagenet1k-inception-bn 1 grace_hopper.jpg lena.png diff --git a/cpp-package/example/inference/sentiment_analysis_rnn.cpp b/cpp-package/example/inference/sentiment_analysis_rnn.cpp new file mode 100755 index 000000000000..53b618ff116c --- /dev/null +++ b/cpp-package/example/inference/sentiment_analysis_rnn.cpp @@ -0,0 +1,488 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * This example demonstrates sentiment prediction workflow with pre-trained RNN model using MXNet C++ API. + * The example performs following tasks. + * 1. Load the pre-trained RNN model, + * 2. Load the dictionary file that contains word to index mapping. + * 3. Create executors for pre-determined input lengths. + * 4. Convert each line in the input to the vector of indices. + * 5. Predictor finds the right executor for each line. + * 4. Run the forward pass for each line and predicts the sentiment scores. + * The example uses a pre-trained RNN model that is trained with the IMDB dataset. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +static const int DEFAULT_BUCKET_KEYS[] = {30, 25, 20, 15, 10, 5}; +static const char DEFAULT_S3_URL[] = "https://s3.amazonaws.com/mxnet-cpp/RNN_model/"; + + +/* + * class Predictor + * + * This class encapsulates the functionality to load the model, process input image and run the forward pass. + */ + +class Predictor { + public: + Predictor() {} + Predictor(const std::string& model_json, + const std::string& model_params, + const std::string& input_dictionary, + const std::vector& bucket_keys, + bool use_gpu = false); + float PredictSentiment(const std::string &input_review); + ~Predictor(); + + private: + void LoadModel(const std::string& model_json_file); + void LoadParameters(const std::string& model_parameters_file); + void LoadDictionary(const std::string &input_dictionary); + inline bool FileExists(const std::string& name) { + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); + } + float PredictSentimentForOneLine(const std::string &input_line); + int ConvertToIndexVector(const std::string& input, + std::vector *input_vector); + int GetIndexForOutputSymbolName(const std::string& output_symbol_name); + float GetIndexForWord(const std::string& word); + int GetClosestBucketKey(int num_words); + + std::map args_map; + std::map aux_map; + std::map wordToIndex; + Symbol net; + std::map executor_buckets; + Context global_ctx = Context::cpu(); + int highest_bucket_key; +}; + + +/* + * The constructor takes the following parameters as input: + * 1. model_json: The RNN model in json formatted file. + * 2. model_params: File containing model parameters + * 3. input_dictionary: File containing the word and associated index. + * 4. bucket_keys: A vector of bucket keys for creating executors. + * + * The constructor: + * 1. Loads the model and parameter files. + * 2. Loads the dictionary file to create index to word and word to index maps. + * 3. For each bucket key in the input vector of bucket keys, it creates an executor. + * The executors share the memory. The bucket key determines the length of input data + * required for that executor. + * 4. Creates a map of bucket key to corresponding executor. + * 5. The model is loaded only once. The executors share the memory for the parameters. + */ +Predictor::Predictor(const std::string& model_json, + const std::string& model_params, + const std::string& input_dictionary, + const std::vector& bucket_keys, + bool use_gpu) { + if (use_gpu) { + global_ctx = Context::gpu(); + } + + /* + * Load the dictionary file that contains the word and its index. + * The function creates word to index and index to word map. The maps are used to create index + * vector for the input sentence. + */ + LoadDictionary(input_dictionary); + + // Load the model + LoadModel(model_json); + + // Load the model parameters. + LoadParameters(model_params); + + /* + * Create the executors for each bucket key. The bucket key represents the shape of input data. + * The executors will share the memory by using following technique: + * 1. Infer the executor arrays and bind the first executor with the first bucket key. + * 2. Then for creating the next bucket key, adjust the shape of input argument to match that key. + * 3. Create the executor for the next bucket key by passing the inferred executor arrays and + * pointer to the executor created for the first key. + */ + std::vector arg_arrays; + std::vector grad_arrays; + std::vector grad_reqs; + std::vector aux_arrays; + + /* + * Create master executor with highest bucket key for optimizing the shared memory between the + * executors for the remaining bucket keys. + */ + highest_bucket_key = *(std::max_element(bucket_keys.begin(), bucket_keys.end())); + args_map["data0"] = NDArray(Shape(highest_bucket_key, 1), global_ctx, false); + args_map["data1"] = NDArray(Shape(1), global_ctx, false); + + net.InferExecutorArrays(global_ctx, &arg_arrays, &grad_arrays, &grad_reqs, + &aux_arrays, args_map, std::map(), + std::map(), aux_map); + Executor *master_executor = net.Bind(global_ctx, arg_arrays, grad_arrays, grad_reqs, aux_arrays, + std::map(), nullptr); + executor_buckets[highest_bucket_key] = master_executor; + + for (int bucket : bucket_keys) { + if (executor_buckets.find(bucket) == executor_buckets.end()) { + arg_arrays[0] = NDArray(Shape(bucket, 1), global_ctx, false); + Executor *executor = net.Bind(global_ctx, arg_arrays, grad_arrays, grad_reqs, aux_arrays, + std::map(), master_executor); + executor_buckets[bucket] = executor; + } + } +} + + +/* + * The following function loads the model from json file. + */ +void Predictor::LoadModel(const std::string& model_json_file) { + if (!FileExists(model_json_file)) { + LG << "Model file " << model_json_file << " does not exist"; + throw std::runtime_error("Model file does not exist"); + } + LG << "Loading the model from " << model_json_file << std::endl; + net = Symbol::Load(model_json_file); +} + + +/* + * The following function loads the model parameters. + */ +void Predictor::LoadParameters(const std::string& model_parameters_file) { + if (!FileExists(model_parameters_file)) { + LG << "Parameter file " << model_parameters_file << " does not exist"; + throw std::runtime_error("Model parameters does not exist"); + } + LG << "Loading the model parameters from " << model_parameters_file << std::endl; + std::map parameters; + NDArray::Load(model_parameters_file, 0, ¶meters); + for (const auto &k : parameters) { + if (k.first.substr(0, 4) == "aux:") { + auto name = k.first.substr(4, k.first.size() - 4); + aux_map[name] = k.second.Copy(global_ctx); + } + if (k.first.substr(0, 4) == "arg:") { + auto name = k.first.substr(4, k.first.size() - 4); + args_map[name] = k.second.Copy(global_ctx); + } + } + /*WaitAll is need when we copy data between GPU and the main memory*/ + NDArray::WaitAll(); +} + + +/* + * The following function loads the dictionary file. + * The function constructs the word to index and index to word maps. + * These maps will be used to represent words in the input sentence to their indices. + * Ensure to use the same dictionary file that was used for training the network. + */ +void Predictor::LoadDictionary(const std::string& input_dictionary) { + if (!FileExists(input_dictionary)) { + LG << "Dictionary file " << input_dictionary << " does not exist"; + throw std::runtime_error("Dictionary file does not exist"); + } + LG << "Loading the dictionary file."; + std::ifstream fi(input_dictionary.c_str()); + if (!fi.is_open()) { + std::cerr << "Error opening dictionary file " << input_dictionary << std::endl; + assert(false); + } + + std::string line; + std::string word; + int index; + while (std::getline(fi, line)) { + std::istringstream stringline(line); + stringline >> word >> index; + wordToIndex[word] = index; + } + fi.close(); +} + + +/* + * The function returns the index associated with the word in the dictionary. + * If the word is not present, the index representing "" is returned. + * If the "" is not present then 0 is returned. + */ +float Predictor::GetIndexForWord(const std::string& word) { + if (wordToIndex.find(word) == wordToIndex.end()) { + if (wordToIndex.find("") == wordToIndex.end()) + return 0; + else + return static_cast(wordToIndex[""]); + } + return static_cast(wordToIndex[word]); +} + +/* + * The function populates the input vector with indices from the dictionary that + * correspond to the words in the input string. + * The function returns the number of words in the input line. + */ +int Predictor::ConvertToIndexVector(const std::string& input, std::vector *input_vector) { + std::istringstream input_string(input); + input_vector->clear(); + const char delimiter = ' '; + std::string token; + size_t words = 0; + while (std::getline(input_string, token, delimiter) && (words <= input_vector->size())) { + input_vector->push_back(GetIndexForWord(token)); + words++; + } + return words; +} + + +/* + * The function returns the index at which the given symbol name will appear + * in the output vector of NDArrays obtained after running the forward pass on the executor. + */ +int Predictor::GetIndexForOutputSymbolName(const std::string& output_symbol_name) { + int index = 0; + for (const std::string op : net.ListOutputs()) { + if (op == output_symbol_name) { + return index; + } else { + index++; + } + } + throw std::runtime_error("The output symbol name can not be found"); +} + + +/* + * The function finds the closest bucket for the given num_words in the input line. + * If the exact bucket key exists, function returns that bucket key. + * If the matching bucket key does not exist, function looks for the next bucket key + * that is greater than given num_words. + * If the next larger bucket does not exist, function returns the largest bucket key. + */ +int Predictor::GetClosestBucketKey(int num_words) { + int closest_bucket_key = highest_bucket_key; + + if (executor_buckets.lower_bound(num_words) != executor_buckets.end()) { + closest_bucket_key = executor_buckets.lower_bound(num_words)->first; + } + return closest_bucket_key; +} + + +/* + * The following function runs the forward pass on the model for the given line. + * + */ +float Predictor::PredictSentimentForOneLine(const std::string& input_line) { + /* + * Initialize a vector of length equal to 'num_words' with index corresponding to . + * Convert the input string to a vector of indices that represent + * the words in the input string. + */ + std::vector index_vector(GetIndexForWord("")); + int num_words = ConvertToIndexVector(input_line, &index_vector); + int bucket_key = GetClosestBucketKey(num_words); + + /* + * The index_vector has size equal to num_words. The vector needs to be padded if + * the bucket_key is greater than num_words. The vector needs to be trimmed if + * the bucket_key is smaller than num_words. + */ + index_vector.resize(bucket_key, GetIndexForWord("")); + + Executor* executor = executor_buckets[bucket_key]; + executor->arg_dict()["data0"].SyncCopyFromCPU(index_vector.data(), index_vector.size()); + executor->arg_dict()["data1"] = num_words; + + // Run the forward pass. + executor->Forward(false); + + /* + * The output is available in executor->outputs. It is a vector of + * NDArray. We need to find the index in that vector that + * corresponds to the output symbol "sentimentnet0_hybridsequential0_dense0_fwd_output". + */ + const std::string output_symbol_name = "sentimentnet0_hybridsequential0_dense0_fwd_output"; + int output_index = GetIndexForOutputSymbolName(output_symbol_name); + std::vector outputs = executor->outputs; + auto arrayout = executor->outputs[output_index].Copy(global_ctx); + /* + * We will run sigmoid operator to find out the sentiment score between + * 0 and 1 where 1 represents positive. + */ + NDArray ret; + Operator("sigmoid")(arrayout).Invoke(ret); + ret.WaitToRead(); + + return ret.At(0, 0); +} + + +/* + * The function predicts the sentiment score for the input review. + * The function splits the input review in lines (separated by '.'). + * It finds sentiment score for each line and computes the average. + */ +float Predictor::PredictSentiment(const std::string& input_review) { + std::istringstream input_string(input_review); + int num_lines = 0; + float sentiment_score = 0.0f; + + // Split the iput review in separate lines separated by '.' + const char delimiter = '.'; + std::string line; + while (std::getline(input_string, line, delimiter)) { + // Predict the sentiment score for each line. + float score = PredictSentimentForOneLine(line); + LG << "Input Line : [" << line << "] Score : " << score; + sentiment_score += score; + num_lines++; + } + + // Find the average sentiment score. + sentiment_score = sentiment_score / num_lines; + return sentiment_score; +} + + +/* + * The destructor frees the executor and notifies MXNetEngine to shutdown. + */ +Predictor::~Predictor() { + for (auto bucket : this->executor_buckets) { + Executor* executor = bucket.second; + delete executor; + } + MXNotifyShutdown(); +} + + +/* + * The function prints the usage information. + */ +void printUsage() { + std::cout << "Usage:" << std::endl; + std::cout << "sentiment_analysis_rnn " << std::endl + << "--input Input movie review. The review can be single line or multiline." + << "e.g. \"This movie is the best.\" OR " + << "\"This movie is the best. The direction is awesome.\" " << std::endl + << "[--gpu] Specify this option if workflow needs to be run in gpu context " + << std::endl + << "If the review is multiline, the example predicts sentiment score for each line " + << "and the final score is the average of scores obtained for each line." + << std::endl; +} + + +/* + * The function downloads the model files from s3 bucket. + */ +void DownloadFiles(const std::vector model_files) { + std::string wget_command("wget -nc "); + std::string s3_url(DEFAULT_S3_URL); + for (auto &file : model_files) { + std::ostringstream oss; + oss << wget_command << s3_url << file << " -O " << file; + int status = system(oss.str().c_str()); + LG << "Downloading " << file << " with status " << status; + } + return; +} + + +int main(int argc, char** argv) { + std::string model_file_json = "./sentiment_analysis-symbol.json"; + std::string model_file_params ="./sentiment_analysis-0010.params"; + std::string input_dictionary = "./sentiment_token_to_idx.txt"; + std::string input_review = "This movie is the best"; + bool use_gpu = false; + + int index = 1; + while (index < argc) { + if (strcmp("--input", argv[index]) == 0) { + index++; + input_review = (index < argc ? argv[index]:input_review); + } else if (strcmp("--gpu", argv[index]) == 0) { + use_gpu = true; + } else if (strcmp("--help", argv[index]) == 0) { + printUsage(); + return 0; + } + index++; + } + + + /* + * Download the trained RNN model file, param file and dictionary file. + * The dictionary file contains word to index mapping. + * Each line of the dictionary file contains a word and the unique index for that word separated + * by a space. For example: + * snippets 11172 + * This dictionary file is created when the RNN model was trained with a particular dataset. + * Hence the dictionary file is specific to the dataset with which model was trained. + */ + std::vector files; + files.push_back(model_file_json); + files.push_back(model_file_params); + files.push_back(input_dictionary); + + DownloadFiles(files); + + std::vector buckets(DEFAULT_BUCKET_KEYS, + DEFAULT_BUCKET_KEYS + sizeof(DEFAULT_BUCKET_KEYS) / sizeof(int)); + + try { + // Initialize the predictor object + Predictor predict(model_file_json, model_file_params, input_dictionary, buckets, use_gpu); + + // Run the forward pass to predict the sentiment score for the given review. + float sentiment_score = predict.PredictSentiment(input_review); + LG << "The sentiment score between 0 and 1, (1 being positive)=" << sentiment_score; + } catch (std::runtime_error &error) { + LG << MXGetLastError(); + LG << "Execution failed with ERROR: " << error.what(); + return 1; + } catch (...) { + /* + * If underlying MXNet code has thrown an exception the error message is + * accessible through MXGetLastError() function. + */ + LG << "Execution failed with following MXNet error"; + LG << MXGetLastError(); + return 1; + } + return 0; +} diff --git a/cpp-package/example/inference/unit_test_imagenet_inference.sh b/cpp-package/example/inference/unit_test_imagenet_inference.sh new file mode 100755 index 000000000000..4d89ba6fb075 --- /dev/null +++ b/cpp-package/example/inference/unit_test_imagenet_inference.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex +# create ./model directory if not existed +if [ ! -d model ]; then + mkdir -p model +fi +# create ./data directory if not existed +if [ ! -d data ]; then + mkdir -p data +fi +# Downloading the data and model if not existed +model_file=./model/Inception-BN-symbol.json +params_file=./model/Inception-BN-0126.params +if [ ! -f ${model_file} ] || [ ! -f ${params_file} ]; then + wget -nc http://data.mxnet.io/models/imagenet/inception-bn.tar.gz + tar -xvzf inception-bn.tar.gz -C model +fi +cd model +wget -nc https://raw.githubusercontent.com/dmlc/gluon-cv/master/gluoncv/model_zoo/quantized/resnet50_v1_int8-symbol.json +cd ../data +wget -nc http://data.mxnet.io/data/val_256_q90.rec +cd .. + +# Running inference on imagenet. +if [ "$(uname)" == "Darwin" ]; then + echo ">>> INFO: FP32 real data" + DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:../../../build ./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --dataset "./data/val_256_q90.rec" --rgb_mean "123.68 116.779 103.939" --batch_size 1 --num_skipped_batches 50 --num_inference_batches 500 + + echo ">>> INFO: FP32 dummy data" + DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:../../../build ./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --batch_size 1 --num_inference_batches 500 --benchmark +else + echo ">>> INFO: FP32 real data" + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../../build ./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --dataset "./data/val_256_q90.rec" --rgb_mean "123.68 116.779 103.939" --batch_size 1 --num_skipped_batches 50 --num_inference_batches 500 + + echo ">>> INFO: FP32 dummy data" + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../../build ./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --batch_size 1 --num_inference_batches 500 --benchmark + + lib_name=$(ls -a ../../../build | grep -oE 'onednn' | tail -1) + if [[ -n ${lib_name} ]] && [[ 'onednn' =~ ${lib_name} ]]; then + echo ">>> INFO: INT8 dummy data" + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../../build ./imagenet_inference --symbol_file "./model/resnet50_v1_int8-symbol.json" --batch_size 1 --num_inference_batches 500 --benchmark + else + echo "Skipped INT8 test because onednn was not found which is required for running inference with quantized models." + fi +fi diff --git a/cpp-package/example/inference/unit_test_sentiment_analysis_rnn.sh b/cpp-package/example/inference/unit_test_sentiment_analysis_rnn.sh new file mode 100755 index 000000000000..1f123c43b7b6 --- /dev/null +++ b/cpp-package/example/inference/unit_test_sentiment_analysis_rnn.sh @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function compare_range() { + perl -e "{if($1>$2 && $1<=$3){print 1} else {print 0}}" +} + +set -e # exit on the first error +export EXE_NAME="sentiment_analysis_rnn" + +# Running the example with a movie review. +if [ "$(uname)" == "Darwin" ]; then + DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:../../../build ./${EXE_NAME} --input "This movie is the best." 2&> ${EXE_NAME}.log +else + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../../build ./${EXE_NAME} --input "This movie is the best." 2&> ${EXE_NAME}.log +fi +result=`grep "The sentiment score between 0 and 1.*\=" ${EXE_NAME}.log | cut -d '=' -f2` +lower_bound=0.8 +upper_bound=0.99 +if [ $(compare_range $result $lower_bound $upper_bound) == 1 ]; +then + echo "PASS: ${EXE_NAME} correctly predicted the sentiment with score = $result" + exit 0 +else + echo "FAIL: ${EXE_NAME} FAILED to predict the sentiment with score = $result" + exit 1 +fi \ No newline at end of file diff --git a/cpp-package/example/lenet.cpp b/cpp-package/example/lenet.cpp new file mode 100644 index 000000000000..8f45cead9075 --- /dev/null +++ b/cpp-package/example/lenet.cpp @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" +#include "utils.h" + +using namespace mxnet::cpp; + +class Lenet { + public: + Lenet() + : ctx_cpu(Context(DeviceType::kCPU, 0)), +#if !MXNET_USE_CUDA + ctx_dev(Context(DeviceType::kCPU, 0)) +#else + ctx_dev(Context(DeviceType::kGPU, 0)) +#endif + {} + + void Run(int max_epoch) { + /* + * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. + * "Gradient-based learning applied to document recognition." + * Proceedings of the IEEE (1998) + * */ + + /*define the symbolic net*/ + Symbol data = Symbol::Variable("data"); + Symbol data_label = Symbol::Variable("data_label"); + Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); + Symbol conv2_w("conv2_w"), conv2_b("conv2_b"); + Symbol conv3_w("conv3_w"), conv3_b("conv3_b"); + Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); + Symbol fc2_w("fc2_w"), fc2_b("fc2_b"); + + Symbol conv1 = + Convolution("conv1", data, conv1_w, conv1_b, Shape(5, 5), 20); + Symbol tanh1 = Activation("tanh1", conv1, ActivationActType::kTanh); + Symbol pool1 = Pooling("pool1", tanh1, Shape(2, 2), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + Symbol conv2 = Convolution("conv2", pool1, conv2_w, conv2_b, + Shape(5, 5), 50); + Symbol tanh2 = Activation("tanh2", conv2, ActivationActType::kTanh); + Symbol pool2 = Pooling("pool2", tanh2, Shape(2, 2), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + Symbol conv3 = Convolution("conv3", pool2, conv3_w, conv3_b, + Shape(2, 2), 500); + Symbol tanh3 = Activation("tanh3", conv3, ActivationActType::kTanh); + Symbol pool3 = Pooling("pool3", tanh3, Shape(2, 2), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(1, 1)); + + Symbol flatten = Flatten("flatten", pool3); + Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500); + Symbol tanh4 = Activation("tanh4", fc1, ActivationActType::kTanh); + Symbol fc2 = FullyConnected("fc2", tanh4, fc2_w, fc2_b, 10); + + Symbol lenet = SoftmaxOutput("softmax", fc2, data_label); + + for (auto s : lenet.ListArguments()) { + LG << s; + } + + /*setup basic configs*/ + int val_fold = 1; + int W = 28; + int H = 28; + int batch_size = 42; + float learning_rate = 1e-4; + float weight_decay = 1e-4; + + /*prepare the data*/ + std::vector data_vec, label_vec; + size_t data_count = GetData(&data_vec, &label_vec); + const float *dptr = data_vec.data(); + const float *lptr = label_vec.data(); + NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu, + false); // store in main memory, and copy to + // device memory while training + NDArray label_array = + NDArray(Shape(data_count), ctx_cpu, + false); // it's also ok if just store them all in device memory + data_array.SyncCopyFromCPU(dptr, data_count * W * H); + label_array.SyncCopyFromCPU(lptr, data_count); + data_array.WaitToRead(); + label_array.WaitToRead(); + + size_t train_num = data_count * (1 - val_fold / 10.0); + train_data = data_array.Slice(0, train_num); + train_label = label_array.Slice(0, train_num); + val_data = data_array.Slice(train_num, data_count); + val_label = label_array.Slice(train_num, data_count); + + LG << "here read fin"; + + /*init some of the args*/ + // map args_map; + args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev); + args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev); + NDArray::WaitAll(); + + LG << "here slice fin"; + /* + * we can also feed in some of the args other than the input all by + * ourselves, + * fc2-w , fc1-b for example: + * */ + // args_map["fc2_w"] = + // NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false); + // NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]); + // args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false); + // args_map["fc1_b"] = 0; + + lenet.InferArgsMap(ctx_dev, &args_map, args_map); + Optimizer* opt = OptimizerRegistry::Find("ccsgd"); + opt->SetParam("momentum", 0.9) + ->SetParam("rescale_grad", 1.0) + ->SetParam("clip_gradient", 10) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + Executor *exe = lenet.SimpleBind(ctx_dev, args_map); + auto arg_names = lenet.ListArguments(); + + for (int ITER = 0; ITER < max_epoch; ++ITER) { + size_t start_index = 0; + while (start_index < train_num) { + if (start_index + batch_size > train_num) { + start_index = train_num - batch_size; + } + args_map["data"] = + train_data.Slice(start_index, start_index + batch_size) + .Copy(ctx_dev); + args_map["data_label"] = + train_label.Slice(start_index, start_index + batch_size) + .Copy(ctx_dev); + start_index += batch_size; + NDArray::WaitAll(); + + exe->Forward(true); + exe->Backward(); + // Update parameters + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; + opt->Update(i, exe->arg_arrays[i], exe->grad_arrays[i]); + } + } + + LG << "Iter " << ITER + << ", accuracy: " << ValAccuracy(batch_size * 10, lenet); + } + delete exe; + delete opt; + } + + private: + Context ctx_cpu; + Context ctx_dev; + std::map args_map; + NDArray train_data; + NDArray train_label; + NDArray val_data; + NDArray val_label; + + size_t GetData(std::vector *data, std::vector *label) { + const char *train_data_path = "./data/mnist_data/mnist_train.csv"; + std::ifstream inf(train_data_path); + std::string line; + inf >> line; // ignore the header + size_t _N = 0; + while (inf >> line) { + for (auto &c : line) c = (c == ',') ? ' ' : c; + std::stringstream ss; + ss << line; + float _data; + ss >> _data; + label->push_back(_data); + while (ss >> _data) data->push_back(_data / 256.0); + _N++; + } + inf.close(); + return _N; + } + + float ValAccuracy(int batch_size, Symbol lenet) { + size_t val_num = val_data.GetShape()[0]; + + size_t correct_count = 0; + size_t all_count = 0; + + size_t start_index = 0; + while (start_index < val_num) { + if (start_index + batch_size > val_num) { + start_index = val_num - batch_size; + } + args_map["data"] = + val_data.Slice(start_index, start_index + batch_size).Copy(ctx_dev); + args_map["data_label"] = + val_label.Slice(start_index, start_index + batch_size).Copy(ctx_dev); + start_index += batch_size; + NDArray::WaitAll(); + + Executor *exe = lenet.SimpleBind(ctx_dev, args_map); + exe->Forward(false); + + const auto &out = exe->outputs; + NDArray out_cpu = out[0].Copy(ctx_cpu); + NDArray label_cpu = + val_label.Slice(start_index - batch_size, start_index).Copy(ctx_cpu); + + NDArray::WaitAll(); + + const mx_float *dptr_out = out_cpu.GetData(); + const mx_float *dptr_label = label_cpu.GetData(); + for (int i = 0; i < batch_size; ++i) { + float label = dptr_label[i]; + int cat_num = out_cpu.GetShape()[1]; + float p_label = 0, max_p = dptr_out[i * cat_num]; + for (int j = 0; j < cat_num; ++j) { + float p = dptr_out[i * cat_num + j]; + if (max_p < p) { + p_label = j; + max_p = p; + } + } + if (label == p_label) correct_count++; + } + all_count += batch_size; + + delete exe; + } + return correct_count * 1.0 / all_count; + } +}; + +int main(int argc, char const *argv[]) { + TRY + Lenet lenet; + lenet.Run(argc > 1 ? strtol(argv[1], nullptr, 10) : 100000); + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/lenet_with_mxdataiter.cpp b/cpp-package/example/lenet_with_mxdataiter.cpp new file mode 100644 index 000000000000..c6b1fd90f846 --- /dev/null +++ b/cpp-package/example/lenet_with_mxdataiter.cpp @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +Symbol LenetSymbol() { + /* + * LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner. + * "Gradient-based learning applied to document recognition." + * Proceedings of the IEEE (1998) + * */ + + /*define the symbolic net*/ + Symbol data = Symbol::Variable("data"); + Symbol data_label = Symbol::Variable("data_label"); + Symbol conv1_w("conv1_w"), conv1_b("conv1_b"); + Symbol conv2_w("conv2_w"), conv2_b("conv2_b"); + Symbol conv3_w("conv3_w"), conv3_b("conv3_b"); + Symbol fc1_w("fc1_w"), fc1_b("fc1_b"); + Symbol fc2_w("fc2_w"), fc2_b("fc2_b"); + + Symbol conv1 = Convolution("conv1", data, conv1_w, conv1_b, Shape(5, 5), 20); + Symbol tanh1 = Activation("tanh1", conv1, ActivationActType::kTanh); + Symbol pool1 = Pooling("pool1", tanh1, Shape(2, 2), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + Symbol conv2 = Convolution("conv2", pool1, conv2_w, conv2_b, Shape(5, 5), 50); + Symbol tanh2 = Activation("tanh2", conv2, ActivationActType::kTanh); + Symbol pool2 = Pooling("pool2", tanh2, Shape(2, 2), PoolingPoolType::kMax, + false, false, PoolingPoolingConvention::kValid, Shape(2, 2)); + + Symbol flatten = Flatten("flatten", pool2); + Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500); + Symbol tanh3 = Activation("tanh3", fc1, ActivationActType::kTanh); + Symbol fc2 = FullyConnected("fc2", tanh3, fc2_w, fc2_b, 10); + + Symbol lenet = SoftmaxOutput("softmax", fc2, data_label); + + return lenet; +} + +NDArray ResizeInput(NDArray data, const Shape new_shape) { + NDArray pic = data.Reshape(Shape(0, 1, 28, 28)); + NDArray output; + Operator("_contrib_BilinearResize2D") + .SetParam("height", new_shape[2]) + .SetParam("width", new_shape[3]) + (pic).Invoke(output); + return output; +} + +int main(int argc, char const *argv[]) { + /*setup basic configs*/ + int W = 28; + int H = 28; + int batch_size = 128; + int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 100; + float learning_rate = 1e-4; + float weight_decay = 1e-4; + + auto dev_ctx = Context::cpu(); + int num_gpu; + MXGetGPUCount(&num_gpu); +#if MXNET_USE_CUDA + if (num_gpu > 0) { + dev_ctx = Context::gpu(); + } +#endif + + TRY + auto lenet = LenetSymbol(); + std::map args_map; + + const Shape data_shape = Shape(batch_size, 1, H, W), + label_shape = Shape(batch_size); + args_map["data"] = NDArray(data_shape, dev_ctx); + args_map["data_label"] = NDArray(label_shape, dev_ctx); + lenet.InferArgsMap(dev_ctx, &args_map, args_map); + + args_map["fc1_w"] = NDArray(Shape(500, 4 * 4 * 50), dev_ctx); + NDArray::SampleGaussian(0, 1, &args_map["fc1_w"]); + args_map["fc2_b"] = NDArray(Shape(10), dev_ctx); + args_map["fc2_b"] = 0; + + std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", + "./data/mnist_data/train-labels-idx1-ubyte", + "./data/mnist_data/t10k-images-idx3-ubyte", + "./data/mnist_data/t10k-labels-idx1-ubyte" + }; + + auto train_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { + return 1; + } + + auto val_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { + return 1; + } + + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("momentum", 0.9) + ->SetParam("rescale_grad", 1.0) + ->SetParam("clip_gradient", 10) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + + auto *exec = lenet.SimpleBind(dev_ctx, args_map); + auto arg_names = lenet.ListArguments(); + + // Create metrics + Accuracy train_acc, val_acc; + + for (int iter = 0; iter < max_epoch; ++iter) { + int samples = 0; + train_iter.Reset(); + train_acc.Reset(); + + auto tic = std::chrono::system_clock::now(); + + while (train_iter.Next()) { + samples += batch_size; + auto data_batch = train_iter.GetDataBatch(); + + ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + + // Compute gradients + exec->Forward(true); + exec->Backward(); + + // Update parameters + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; + opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); + } + + // Update metric + train_acc.Update(data_batch.label, exec->outputs[0]); + } + + // one epoch of training is finished + auto toc = std::chrono::system_clock::now(); + float duration = std::chrono::duration_cast + (toc - tic).count() / 1000.0; + LG << "Epoch[" << iter << "] " << samples / duration \ + << " samples/sec " << "Train-Accuracy=" << train_acc.Get();; + + val_iter.Reset(); + val_acc.Reset(); + + Accuracy acu; + val_iter.Reset(); + while (val_iter.Next()) { + auto data_batch = val_iter.GetDataBatch(); + ResizeInput(data_batch.data, data_shape).CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); + NDArray::WaitAll(); + + // Only forward pass is enough as no gradient is needed when evaluating + exec->Forward(false); + NDArray::WaitAll(); + acu.Update(data_batch.label, exec->outputs[0]); + val_acc.Update(data_batch.label, exec->outputs[0]); + } + LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get(); + } + + delete exec; + delete opt; + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/mlp.cpp b/cpp-package/example/mlp.cpp new file mode 100644 index 000000000000..970dad74e727 --- /dev/null +++ b/cpp-package/example/mlp.cpp @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + */ + +#include +#include +#include +#include "mxnet-cpp/MxNetCpp.h" +#include "utils.h" + +using namespace mxnet::cpp; + +/* + * In this example, + * we make by hand some data in 10 classes with some pattern + * and try to use MLP to recognize the pattern. + */ + +void OutputAccuracy(mx_float* pred, mx_float* target) { + int right = 0; + for (int i = 0; i < 128; ++i) { + float mx_p = pred[i * 10 + 0]; + float p_y = 0; + for (int j = 0; j < 10; ++j) { + if (pred[i * 10 + j] > mx_p) { + mx_p = pred[i * 10 + j]; + p_y = j; + } + } + if (p_y == target[i]) right++; + } + std::cout << "Accuracy: " << right / 128.0 << std::endl; +} + +void MLP(int max_epoch) { + auto sym_x = Symbol::Variable("X"); + auto sym_label = Symbol::Variable("label"); + + const int nLayers = 2; + std::vector layerSizes({512, 10}); + std::vector weights(nLayers); + std::vector biases(nLayers); + std::vector outputs(nLayers); + + Symbol null_sym; + for (int i = 0; i < nLayers; i++) { + std::string istr = std::to_string(i); + weights[i] = Symbol::Variable(std::string("w") + istr); + biases[i] = Symbol::Variable(std::string("b") + istr); + Symbol fc = FullyConnected(std::string("fc") + istr, + i == 0? sym_x : outputs[i-1], + weights[i], biases[i], layerSizes[i]); + outputs[i] = LeakyReLU(std::string("act") + istr, fc, null_sym, LeakyReLUActType::kLeaky); + } + auto sym_out = SoftmaxOutput("softmax", outputs[nLayers - 1], sym_label); + + Context ctx_dev(DeviceType::kCPU, 0); + + NDArray array_x(Shape(128, 28), ctx_dev, false); + NDArray array_y(Shape(128), ctx_dev, false); + + mx_float* aptr_x = new mx_float[128 * 28]; + mx_float* aptr_y = new mx_float[128]; + + // we make the data by hand, in 10 classes, with some pattern + for (int i = 0; i < 128; i++) { + for (int j = 0; j < 28; j++) { + aptr_x[i * 28 + j] = i % 10 * 1.0f; + } + aptr_y[i] = i % 10; + } + array_x.SyncCopyFromCPU(aptr_x, 128 * 28); + array_x.WaitToRead(); + array_y.SyncCopyFromCPU(aptr_y, 128); + array_y.WaitToRead(); + + // init the parameters + NDArray array_w_1(Shape(512, 28), ctx_dev, false); + NDArray array_b_1(Shape(512), ctx_dev, false); + NDArray array_w_2(Shape(10, 512), ctx_dev, false); + NDArray array_b_2(Shape(10), ctx_dev, false); + + // the parameters should be initialized in some kind of distribution, + // so it learns fast + // but here just give a const value by hand + array_w_1 = 0.5f; + array_b_1 = 0.0f; + array_w_2 = 0.5f; + array_b_2 = 0.0f; + + // the grads + NDArray array_w_1_g(Shape(512, 28), ctx_dev, false); + NDArray array_b_1_g(Shape(512), ctx_dev, false); + NDArray array_w_2_g(Shape(10, 512), ctx_dev, false); + NDArray array_b_2_g(Shape(10), ctx_dev, false); + + // Bind the symolic network with the ndarray + // all the input args + std::vector in_args; + in_args.push_back(array_x); + in_args.push_back(array_w_1); + in_args.push_back(array_b_1); + in_args.push_back(array_w_2); + in_args.push_back(array_b_2); + in_args.push_back(array_y); + // all the grads + std::vector arg_grad_store; + arg_grad_store.push_back(NDArray()); // we don't need the grad of the input + arg_grad_store.push_back(array_w_1_g); + arg_grad_store.push_back(array_b_1_g); + arg_grad_store.push_back(array_w_2_g); + arg_grad_store.push_back(array_b_2_g); + arg_grad_store.push_back( + NDArray()); // neither do we need the grad of the loss + // how to handle the grad + std::vector grad_req_type; + grad_req_type.push_back(kNullOp); + grad_req_type.push_back(kWriteTo); + grad_req_type.push_back(kWriteTo); + grad_req_type.push_back(kWriteTo); + grad_req_type.push_back(kWriteTo); + grad_req_type.push_back(kNullOp); + std::vector aux_states; + + std::cout << "make the Executor" << std::endl; + Executor* exe = new Executor(sym_out, ctx_dev, in_args, arg_grad_store, + grad_req_type, aux_states); + + std::cout << "Training" << std::endl; + mx_float learning_rate = 0.0001; + for (int epoch_num = 0; epoch_num < max_epoch; ++epoch_num) { + exe->Forward(true); + // print accuracy every 100 epoch + if (epoch_num % 100 == 0) { + std::cout << "epoch " << epoch_num << std::endl; + std::vector& out = exe->outputs; + float* cptr = new float[128 * 10]; + out[0].SyncCopyToCPU(cptr, 128 * 10); + NDArray::WaitAll(); + OutputAccuracy(cptr, aptr_y); + delete[] cptr; + } + + // update the parameters + exe->Backward(); + for (int i = 1; i < 5; ++i) { + in_args[i] -= arg_grad_store[i] * learning_rate; + } + NDArray::WaitAll(); + } + + delete exe; + delete[] aptr_x; + delete[] aptr_y; +} + +int main(int argc, char** argv) { + int max_epoch = argc > 1 ? strtol(argv[1], nullptr, 10) : 15000; + TRY + MLP(max_epoch); + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/mlp_cpu.cpp b/cpp-package/example/mlp_cpu.cpp new file mode 100644 index 000000000000..7ea6946dd8c2 --- /dev/null +++ b/cpp-package/example/mlp_cpu.cpp @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Xin Li yakumolx@gmail.com + */ +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +Symbol mlp(const std::vector &layers) { + auto x = Symbol::Variable("X"); + auto label = Symbol::Variable("label"); + + std::vector weights(layers.size()); + std::vector biases(layers.size()); + std::vector outputs(layers.size()); + + for (size_t i = 0; i < layers.size(); ++i) { + weights[i] = Symbol::Variable("w" + std::to_string(i)); + biases[i] = Symbol::Variable("b" + std::to_string(i)); + Symbol fc = FullyConnected( + i == 0? x : outputs[i-1], // data + weights[i], + biases[i], + layers[i]); + outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); + } + + return SoftmaxOutput(outputs.back(), label); +} + +int main(int argc, char** argv) { + const int image_size = 28; + const std::vector layers{128, 64, 10}; + const int batch_size = 100; + const int max_epoch = 10; + const float learning_rate = 0.1; + const float weight_decay = 1e-2; + + std::vector data_files = { "./data/mnist_data/train-images-idx3-ubyte", + "./data/mnist_data/train-labels-idx1-ubyte", + "./data/mnist_data/t10k-images-idx3-ubyte", + "./data/mnist_data/t10k-labels-idx1-ubyte" + }; + + auto train_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&train_iter, "Train", data_files, batch_size)) { + return 1; + } + + auto val_iter = MXDataIter("MNISTIter"); + if (!setDataIter(&val_iter, "Label", data_files, batch_size)) { + return 1; + } + + TRY + auto net = mlp(layers); + + Context ctx = Context::cpu(); // Use CPU for training + + std::map args; + args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx); + args["label"] = NDArray(Shape(batch_size), ctx); + // Let MXNet infer shapes other parameters such as weights + net.InferArgsMap(ctx, &args, args); + + // Initialize all parameters with uniform distribution U(-0.01, 0.01) + auto initializer = Uniform(0.01); + for (auto& arg : args) { + // arg.first is parameter name, and arg.second is the value + initializer(arg.first, &arg.second); + } + + // Create sgd optimizer + Optimizer* opt = OptimizerRegistry::Find("sgd"); + opt->SetParam("rescale_grad", 1.0/batch_size) + ->SetParam("lr", learning_rate) + ->SetParam("wd", weight_decay); + + // Create executor by binding parameters to the model + auto *exec = net.SimpleBind(ctx, args); + auto arg_names = net.ListArguments(); + + // Start training + for (int iter = 0; iter < max_epoch; ++iter) { + int samples = 0; + train_iter.Reset(); + + auto tic = std::chrono::system_clock::now(); + while (train_iter.Next()) { + samples += batch_size; + auto data_batch = train_iter.GetDataBatch(); + // Set data and label + data_batch.data.CopyTo(&args["X"]); + data_batch.label.CopyTo(&args["label"]); + + // Compute gradients + exec->Forward(true); + exec->Backward(); + // Update parameters + for (size_t i = 0; i < arg_names.size(); ++i) { + if (arg_names[i] == "X" || arg_names[i] == "label") continue; + opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); + } + } + auto toc = std::chrono::system_clock::now(); + + Accuracy acc; + val_iter.Reset(); + while (val_iter.Next()) { + auto data_batch = val_iter.GetDataBatch(); + data_batch.data.CopyTo(&args["X"]); + data_batch.label.CopyTo(&args["label"]); + // Forward pass is enough as no gradient is needed when evaluating + exec->Forward(false); + acc.Update(data_batch.label, exec->outputs[0]); + } + float duration = std::chrono::duration_cast + (toc - tic).count() / 1000.0; + LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get(); + } + + delete exec; + delete opt; + MXNotifyShutdown(); + CATCH + return 0; +} diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp new file mode 100644 index 000000000000..8db6638a90d3 --- /dev/null +++ b/cpp-package/example/mlp_csv.cpp @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Example: mlp_csv + * Description: + * The following example demonstrates how to use CSVIter. This example creates + * mlp (multi-layer perceptron) model and trains the MNIST data which is in + * CSV format. + */ +#include +#include +#include "utils.h" +#include "mxnet-cpp/MxNetCpp.h" + +using namespace mxnet::cpp; + +/* + * Implementing the mlp symbol with given hidden units configuration. + */ +Symbol mlp(const std::vector &hidden_units) { + auto data = Symbol::Variable("data"); + auto label = Symbol::Variable("label"); + + std::vector weights(hidden_units.size()); + std::vector biases(hidden_units.size()); + std::vector outputs(hidden_units.size()); + + for (size_t i = 0; i < hidden_units.size(); ++i) { + weights[i] = Symbol::Variable("w" + std::to_string(i)); + biases[i] = Symbol::Variable("b" + std::to_string(i)); + Symbol fc = FullyConnected( + i == 0? data : outputs[i-1], // data + weights[i], + biases[i], + hidden_units[i]); + outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); + } + return SoftmaxOutput(outputs.back(), label); +} + +/* + * Convert the input string of number of hidden units into the vector of integers. + */ +std::vector getLayers(const std::string& hidden_units_string) { + std::vector hidden_units; + char *pNext; + int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10); + hidden_units.push_back(num_unit); + while (*pNext) { + num_unit = strtol(pNext, &pNext, 10); + hidden_units.push_back(num_unit); + } + return hidden_units; +} + +void printUsage() { + std::cout << "Usage:" << std::endl; + std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 " + << "--batch_size 100 --hidden_units \"128 64 64\" --gpu" << std::endl; + std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes " + << "the column 0 to be label and the rest 784 column to be data." << std::endl; + std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, " + << "program uses 'gpu' context." <