Skip to content

Commit

Permalink
Change defaults and improve CMake export
Browse files Browse the repository at this point in the history
  • Loading branch information
jerinphilip authored Oct 28, 2023
1 parent ed4bfa4 commit 4322e41
Show file tree
Hide file tree
Showing 16 changed files with 226 additions and 104 deletions.
18 changes: 15 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake
python3 -m pip install black isort
python3 -m pip install cmake-format
sudo apt-get install -y shfmt
Expand Down Expand Up @@ -71,7 +72,7 @@ jobs:
with:
submodules: recursive

- name: Install dependencies
- name: Setup
run: |
bash scripts/ci/${{ matrix.name }}/01-setup.sh
Expand All @@ -87,6 +88,15 @@ jobs:
run:
bash scripts/ci/${{ matrix.name }}/04-package.sh

- uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: slimt
path: |
${{ github.workspace }}/slimt-coredump
build-wheels:
strategy:
matrix:
Expand Down Expand Up @@ -199,7 +209,7 @@ jobs:
PYTHON_LOCAL_VERSION_IDENTIFIER=${{ env.PYTHON_LOCAL_VERSION_IDENTIFIER }}
CCACHE_DIR=/host/${{ env.ccache_dir }}
CCACHE_BASEDIR=/host/${{ env.ccache_basedir }}
CMAKE_ARGS="-DUSE_PYBIND11_SOURCE=ON -DBLA_VENDOR=OpenBLAS"
CMAKE_ARGS="-DUSE_PYBIND11_SOURCE=ON -DBLA_VENDOR=OpenBLAS -DUSE_BUILTIN_SENTENCEPIECE=ON -DWITH_GEMMOLOGY=OFF -DWITH_INTGEMM=ON -DUSE_SSE2=ON"

CIBW_ENVIRONMENT_MACOS:
USE_CCACHE=1
Expand Down Expand Up @@ -232,9 +242,11 @@ jobs:
# Install CBLAS
# yum install -y atlas-devel blas-devel
chmod -R a+rwx /host/${{ env.ccache_dir }}
lscpu
ccache -s # Print current cache stats
ccache -z # Zero cache entry
Expand Down
7 changes: 3 additions & 4 deletions 3rd-party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ else(USE_BUILTIN_SENTENCEPIECE)
# surface, it becomes a problem. See:
# https://github.com/google/sentencepiece/issues/922
add_library(sentencepiece_pb INTERFACE)
target_include_directories(sentencepiece_pb
INTERFACE sentencepiece/third_party/protobuf-lite)
target_include_directories(sentencepiece_pb
INTERFACE sentencepiece/src/builtin_pb)
target_include_directories(
sentencepiece_pb INTERFACE sentencepiece/third_party/protobuf-lite
sentencepiece/src/builtin_pb)

add_library(SentencePiece::SentencePiece ALIAS sentencepiece)
add_library(SentencePiece::Protobuf ALIAS sentencepiece_pb)
Expand Down
81 changes: 45 additions & 36 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ set(CMAKE_VISIBILITY_INLINE_HIDDEN YES)
option(WITH_ASAN "Enable AddressSanitizer in builds" OFF)
option(WITH_TESTS "Enable building tests (requires a few extra libraries)" OFF)

option(WITH_INTGEMM "Use intgemm" ON)
option(WITH_INTGEMM "Use intgemm" OFF)
option(WITH_RUY "Use ruy" OFF)
option(WITH_GEMMOLOGY "Use gemmology" OFF)
option(WITH_GEMMOLOGY "Use gemmology" ON)
option(WITH_BLAS "Use BLAS. Otherwise moves to ruy" ON)

option(USE_BUILTIN_SENTENCEPIECE "Use SentencePiece supplied as 3rd-party" ON)
Expand All @@ -26,7 +26,7 @@ option(USE_SSSE3 "Use SSSE3" OFF)
option(USE_SSE2 "Use SSE2" OFF)
option(USE_NEON "Use NEON" OFF)

option(SLIMT_SIMD "Use SLIMT SIMD" OFF)
option(SLIMT_PACKAGE "Package for cmake, pkgconfig" OFF)

set(SLIMT_COMPILE_OPTIONS
-Wall
Expand All @@ -51,30 +51,41 @@ if(WITH_ASAN)
list(APPEND SLIMT_LINK_OPTIONS -fsanitize=address -fsanitize=undefined)
endif(WITH_ASAN)

find_package(PCRE2 REQUIRED)
if(WITH_TCMALLOC)
find_library(
TCMALLOC_LIBRARIES
NAMES tcmalloc_minimal
PATHS /usr/lib /usr/local/lib)

if(USE_BUILTIN_SENTENCEPIECE)
list(APPEND SLIMT_PUBLIC_LIBS SentencePiece::SentencePiece)
else(USE_BUILTIN_SENTENCEPIECE)
list(APPEND SLIMT_PRIVATE_LIBS SentencePiece::SentencePiece)
list(APPEND SLIMT_PRIVATE_LIBS SentencePiece::Protobuf)
endif(USE_BUILTIN_SENTENCEPIECE)
if(TCMALLOC_LIBRARIES)
message(STATUS "tcmalloc found: ${TCMALLOC_LIBRARIES}")
else(TCMALLOC_LIBRARIES)
message(FATAL_ERROR "tcmalloc not found")
endif(TCMALLOC_LIBRARIES)

list(APPEND SLIMT_PRIVATE_LIBS PCRE2::PCRE2)
list(APPEND SLIMT_PRIVATE_LIBS ${TCMALLOC_LIBRARIES})
endif(WITH_TCMALLOC)

find_package(PCRE2 REQUIRED)
add_subdirectory(3rd-party)

list(APPEND SLIMT_PRIVATE_LIBS PCRE2::PCRE2)
list(APPEND SLIMT_PUBLIC_LIBS SentencePiece::SentencePiece)

if(NOT USE_BUILTIN_SENTENCEPIECE)
list(APPEND SLIMT_PRIVATE_LIBS SentencePiece::Protobuf)
endif(NOT USE_BUILTIN_SENTENCEPIECE)

if(WITH_BLAS)
find_package(BLAS REQUIRED)
find_path(CBLAS_INCLUDE_DIR cblas.h)

# The above defines BLAS::BLAS only one recent cmakes, the following code
# detects if the target is available if not creates an alias target.
if(BLAS_FOUND AND NOT TARGET BLAS::BLAS)
add_library(BLAS::BLAS INTERFACE IMPORTED)
set_target_properties(
BLAS::BLAS
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${BLAS_INCLUDE_DIR} ${CBLAS_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "${BLAS_LIBRARIES}")
BLAS::BLAS PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${BLAS_INCLUDE_DIR} "
INTERFACE_LINK_LIBRARIES "${BLAS_LIBRARIES}")
endif()

list(APPEND SLIMT_PRIVATE_LIBS BLAS::BLAS)
Expand Down Expand Up @@ -164,26 +175,24 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")

if(UNIX)
include(GNUInstallDirs)
if(BUILD_CPACK)
# cmake-format: off
set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
set(CPACK_PACKAGE_CONTACT "Jerin Philip <[email protected]>")
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Jerin Philip")
set(CPACK_PACKAGE_DIRECTORY ${CMAKE_BINARY_DIR}/deb)
set(CPACK_GENERATOR "DEB")
set(CPACK_DEBIAN_PACKAGE_DEPENDS libsentencepiece-dev)
set(CPACK_PACKAGE_NAME ${PROJECT_NAME})
set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME})
set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION_STRING})
set(CPACK_DEBIAN_PACKAGE_NAME ${PROJECT_NAME})
set(CPACK_DEBIAN_PACKAGE_VERSION "${PROJECT_VERSION_STRING}")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "slimt: Inference library for tiny translation models.")
# cmake-format: on
include(CPack)
endif(BUILD_CPACK)
# cmake-format: off
set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
set(CPACK_PACKAGE_CONTACT "Jerin Philip <[email protected]>")
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Jerin Philip")
set(CPACK_PACKAGE_DIRECTORY ${CMAKE_BINARY_DIR}/deb)
set(CPACK_GENERATOR "DEB")
set(CPACK_DEBIAN_PACKAGE_DEPENDS libsentencepiece-dev)
set(CPACK_PACKAGE_NAME ${PROJECT_NAME})
set(CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME})
set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION_STRING})
set(CPACK_DEBIAN_PACKAGE_NAME ${PROJECT_NAME})
set(CPACK_DEBIAN_PACKAGE_VERSION "${PROJECT_VERSION_STRING}")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "slimt: Inference library for tiny translation models.")
# cmake-format: on
include(CPack)
endif(UNIX)

add_subdirectory(slimt)
Expand Down
89 changes: 77 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,24 @@ More information on the models are described in the following papers:
The large-list of dependencies from bergamot-translator have currently been
reduced to:

* For `int8_t` matrix-multiply [intgemm](https://github.com/kpu/intgemm) (`x86_64`) or
[ruy](https://github.com/google/ruy) (`aarch64`).
* For `int8_t` matrix-multiply [intgemm](https://github.com/kpu/intgemm)
(`x86_64`) or [ruy](https://github.com/google/ruy) (`aarch64`) or
[xsimd](https://github.com/xtensor-stack/xsimd) via
[gemmology](https://github.com/mozilla/gemmology).
* For vocabulary - [sentencepiece](https://github.com/browsermt/sentencepiece).
* For `sgemm` - Whatever BLAS provider is found via CMake.
* CLI11 (only a dependency for cmdline)
* For sentence-splitting using regular-expressions
[PCRE2](https://github.com/PCRE2Project/pcre2).
* For `sgemm` - Whatever BLAS provider is found via CMake (openblas,
intel-oneapimkl, cblas). Feel free to provide
[hints](https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors).
* [CLI11](https://github.com/CLIUtils/CLI11/) (only a dependency for cmdline)

Source code is made public where basic functionality (text-translation) works
for English-German tiny models. Parity in features and speed with marian and
bergamot-translator (where relevant) is a work-in-progress. Eventual support for
`base` models are planned. Contributions are welcome and appreciated.



## Getting started

Clone with submodules.
Expand All @@ -70,32 +75,86 @@ Clone with submodules.
git clone --recursive https://github.com/jerinphilip/slimt.git
```

Configure and build.
Configure and build. `slimt` is still experimenting with CMake and
dependencies. The following should work at the moment:


```bash
# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release

# Configure intgemm
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DWITH_INTGEMM=ON
# Configure ruy instead of intgemm
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DWITH_RUY=ON

# Build
cmake --build build --target all --parallel 4
```

Successful build generate two executables `slimt` and `slimt_test` for
Successful build generate two executables `slimt-cli` and `slimt-test` for
command-line usage and testing respectively.

```bash
build/bin/slimt \
build/bin/slimt-cli \
--root <path/to/folder> \
--model </relative/path/to/model> \
--vocabulary </relative/path/to/vocab> \
--shortlist </relative/path/to/shortlist>

build/slimt_test <test-name>
build/slimt-test <test-name>
```

### Distribution

There is a build-path being prepared towards packaging on Linux. To use this,
configure with the following args:

```bash
# Configure to use xsimd via gemmology
ARGS=(
# Use gemmology
-DWITH_GEMMOLOGY=ON

# -DUSE_AVX512 -DUSE_SSSE3 ... -DUSE_NEON also available.
-DUSE_AVX2=ON

# Use sentencepiece installed via system.
-DUSE_BUILTIN_SENTENCEPIECE=OFF

# Exports slimtConfig.cmake (cmake) and slimt.pc.in (pkg-config)
-DSLIMT_PACKAGE=ON

-DCMAKE_INSTALL_PREFIX=/path/to/prefix
)

cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release "${ARGS[@]}"
cmake --build build --target all

# May require if prefix is writable only by root.
cmake --build build --target install
```

The above run expects the packages `sentencepiece`, `xsimd` and a BLAS provider
to come from the system's package manager. Examples of this in distributions
include:

```bash
# Debian based systems
sudo apt-get install -y libxsimd-dev libsentencepiece-dev libopenblas-dev

# ArchLinux
pacman -S openblas xsimd
yay -S sentencepiece-git
```

This is still very much a work in progress, towards being able to make
[lemonade](https://github.com/jerinphilip/lemonade) available in distributions.
Help is much appreciated here, please get in touch if you can help here.

### Python

Python bindings to the C++ code are available.
Python bindings to the C++ code are available. Python bindings provide a layer
to download models and use-them via command line entrypoint `slimt` (the core
slimt library only has the inference code).

```bash
python3 -m venv env
Expand All @@ -108,3 +167,9 @@ python3 -m pip install dist/<wheel-name>.whl
slimt download -m en-de-tiny
slimt download -m de-en-tiny
```

You may pass customizing cmake-variables via `CMAKE_ARGS` environment variable.

```bash
CMAKE_ARGS='-D...' python3 setup.py bdist_wheel
```
8 changes: 0 additions & 8 deletions cmake/slimtConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,3 @@ pkg_check_modules(Sentencepiece REQUIRED sentencepiece)

# Exports slimt::slimt-shared target.
include("${CMAKE_CURRENT_LIST_DIR}/slimtTargets.cmake")

# Sentencepiece has to be picked up by CMake, as it is not exported by
# slimtTargets.cmake.
add_library(slimt_export INTERFACE)
target_link_libraries(slimt_export INTERFACE slimt::slimt-shared
${Sentencepiece_LIBRARIES})
target_include_directories(slimt_export INTERFACE ${Sentencepiece_INCLUDE_DIRS})
add_library(slimt::slimt ALIAS slimt_export)
2 changes: 2 additions & 0 deletions scripts/ci/android/02-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ function cmake-configure {
SLIMT_ARGS=(
-DWITH_RUY=ON
-DWITH_INTGEMM=OFF
-DWITH_GEMMOLOGY=OFF
-DUSE_BUILTIN_SENTENCEPIECE=ON
-DWITH_BLAS=OFF
-DSLIMT_USE_INTERNAL_PCRE2=ON
)
Expand Down
1 change: 0 additions & 1 deletion scripts/ci/format-check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ function slimt-check-clang-tidy {

# Gemmology, which is default on has to be turned off.
-DWITH_GEMMOLOGY=OFF
-DEXPORT_CMAKE_FILE=OFF
)

cmake -B build -S . "${ARGS[@]}"
Expand Down
13 changes: 13 additions & 0 deletions scripts/ci/macos/01-setup.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
#!/bin/bash

brew install cmake
brew install xsimd openblas
brew install sentencepiece

sysctl -a | grep machdep.cpu.features

ulimit -c unlimited # Enable core dumps to be captured (must be in same run block)

COREDUMP_DIR="$PWD/slimt-coredump"
mkdir -p "${COREDUMP_DIR}"
COREDUMP_PATTERN="${COREDUMP_DIR}/core.%n.%P.%t"
sudo sysctl -w kern.corefile=${COREDUMP_PATTERN}

echo "coredumps: ${COREDUMP_PATTERN}"
13 changes: 12 additions & 1 deletion scripts/ci/macos/02-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
set -eo pipefail

# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INTGEMM=ON -DWITH_RUY=OFF
ARGS=(
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_INTGEMM=OFF -DWITH_RUY=OFF
-DWITH_GEMMOLOGY=ON
-DUSE_SSE2=ON
-DUSE_BUILTIN_SENTENCEPIECE=OFF

-DCMAKE_BUILD_TYPE=Debug
-DWITH_ASAN=ON
)

cmake -B build -S $PWD "${ARGS[@]}"

# Build
cmake --build build --target all
Loading

0 comments on commit 4322e41

Please sign in to comment.