Skip to content

Commit

Permalink
Make way for aarch64
Browse files Browse the repository at this point in the history
Enable the following paths.

1. google/ruy for `aarch64` int8*int8 matmul (affine) - Mac M1, Android
Phones.
2. google/ruy for android `aarch64` float32*float32 matmul (attention).
3. Apple Accelerate for Mac M1 float32*float32 matmul (attention)

Effectively, this supports Mac (x86_64 Intel, `aarch64` M1) and Android
(untested, `aarch64`) now. There are obvious optimizations pending on
some fronts in the newly created branches.

PR: #2
  • Loading branch information
jerinphilip authored Aug 14, 2023
1 parent 82ebf9a commit 774fbc7
Show file tree
Hide file tree
Showing 34 changed files with 1,362 additions and 613 deletions.
16 changes: 11 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,15 @@ jobs:
build-test:
name: "build-test"
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
include:
- name: "ubuntu"
os: "ubuntu-latest"
- name: "macos"
os: "macos-latest"
- name: "android"
os: "ubuntu-latest"

runs-on: ${{ matrix.os }}

Expand All @@ -48,13 +55,12 @@ jobs:

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake
bash scripts/ci/${{ matrix.name }}/01-setup.sh
- name: Build
run:
bash scripts/ci/build.sh
bash scripts/ci/${{ matrix.name }}/02-build.sh

- name: Test
run:
bash scripts/ci/test.sh
bash scripts/ci/${{ matrix.name }}/03-test.sh
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "3rd-party/intgemm"]
path = 3rd-party/intgemm
url = https://github.com/kpu/intgemm
[submodule "3rd-party/ruy"]
path = 3rd-party/ruy
url = https://github.com/google/ruy
3 changes: 3 additions & 0 deletions 3rd-party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ add_library(Sentencepiece::Sentencepiece ALIAS spiece_iface)

# intgemm
add_subdirectory(intgemm EXCLUDE_FROM_ALL)

# ruy
add_subdirectory(ruy EXCLUDE_FROM_ALL)
1 change: 1 addition & 0 deletions 3rd-party/ruy
Submodule ruy added at c04e5e
45 changes: 41 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,48 @@ option(WITH_ASAN "Enable AddressSanitizer in builds" OFF)
option(WITH_TESTS "Enable building tests (requires a few extra libraries)" OFF)

add_subdirectory(3rd-party)
find_package(BLAS REQUIRED)

set(SLIMT_EXTRA_COMPILE_OPTIONS -Wall -Wpedantic -Wextra -Wno-unknown-pragmas
-Werror -march=native)
set(SLIMT_EXTERNAL_LIBS Sentencepiece::Sentencepiece intgemm BLAS::BLAS)
set(SLIMT_COMPILE_OPTIONS -Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Werror)
set(SLIMT_COMPILE_DEFINITIONS "")

option(WITH_INTGEMM "Use intgemm" ON)
option(WITH_RUY "Use ruy" OFF)
option(WITH_BLAS "Use BLAS. Otherwise moves to ruy" ON)

set(SLIMT_EXTERNAL_LIBS Sentencepiece::Sentencepiece)

if(WITH_BLAS)
find_package(BLAS)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} BLAS::BLAS)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_BLAS)
if(APPLE)
set(BLAS_VENDOR "Accelerate")
# see https://developer.apple.com/documentation/accelerate for more info you
# may need to install Xcode command line tools if you don't have them
# already (https://developer.apple.com/xcode/features/)
add_library(apple_accelerate INTERFACE)
target_link_libraries(apple_accelerate INTERFACE "-framework Accelerate")
# https://stackoverflow.com/a/58304566
target_include_directories(
apple_accelerate
INTERFACE
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers"
)
add_library(AppleLibs::accelerate ALIAS apple_accelerate)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} AppleLibs::accelerate)
endif(APPLE)
endif(WITH_BLAS)

if(WITH_INTGEMM)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} intgemm)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_INTGEMM)
set(SLIMT_COMPILE_OPTIONS ${SLIMT_COMPILE_OPTIONS} -march=native)
endif(WITH_INTGEMM)

if(WITH_RUY)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_RUY)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} ruy)
endif(WITH_RUY)

# cmake-format: off
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg" CACHE STRING "Flags used by the C++ compiler during profile builds." FORCE)
Expand Down
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ library with focus on machine translation, all the bells and whistles that come
with it are not necessary to run inference on client-machines (e.g: autograd,
multiple sequence-to-sequence architecture support, beam-search). For some use
cases like an input-method engine doing translation (see
[lemonade](https://github.com/jerinphilip/lemonade)). Single-thread operation
[lemonade](https://github.com/jerinphilip/lemonade)) - single-thread operation
existing along with other processes on the system suffices. This is the
motivation for this transplant repository. There's not much novel here except
easiness to wield. This repository is simply just the _tiny11_ part of marian.
Expand All @@ -30,10 +30,9 @@ The large-list of dependencies from bergamot-translator have currently been
reduced to:

* For `int8_t` matrix-multiply [intgemm](https://github.com/kpu/intgemm) (`x86_64`) or
[ruy](https://github.com/google/ruy) (`aarch64`, planned).
[ruy](https://github.com/google/ruy) (`aarch64`).
* For vocabulary - [sentencepiece](https://github.com/browsermt/sentencepiece).
* For `sgemm` - Whatever BLAS provider is found via CMake.
* OpenMP is used in `layer_norm`, and is pending removal.
* CLI11 (only a dependency for cmdline)

Source code is made public where basic functionality (text-translation) works.
Expand All @@ -42,6 +41,13 @@ relevant) is a work-in-progress. Contributions are welcome and appreciated.

## Getting started

Clone with submodules.

```
git clone --recursive https://github.com/jerinphilip/slimt.git
```

Configure and build.

```bash
# Configure
Expand Down
3 changes: 2 additions & 1 deletion app/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ void run(const Options &options) {
};

// Tokenize into numeric-ids using sentencepiece.
size_t max_sequence_length = 0;
Vocabulary vocab(mmap.vocab.data(), mmap.vocab.size());
ShortlistGenerator shortlist_generator( //
mmap.shortlist.data(), mmap.shortlist.size(), //
Expand Down Expand Up @@ -95,6 +94,7 @@ void run(const Options &options) {
};

std::string line;
size_t max_sequence_length = 0;
size_t token_count = 0;
size_t line_no = 0;
Sentences sentences;
Expand All @@ -114,6 +114,7 @@ void run(const Options &options) {
if (token_count > options.max_tokens_per_batch) {
batch_and_translate(sentences, max_sequence_length);
sentences.clear();
max_sequence_length = 0;
}
sentences.push_back(std::move(words));
max_sequence_length = candidate_max_sequence_length;
Expand Down
20 changes: 14 additions & 6 deletions app/test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <iostream>
#include <unordered_map>

#include "3rd-party/intgemm/intgemm/intgemm.h"
#include "slimt/slimt.hh"

namespace slimt {
Expand Down Expand Up @@ -267,6 +266,11 @@ void LayerNormalizationOp() {
SLIMT_TRACE(lhs_expected);
SLIMT_CHECK(lhs == lhs_expected);
}
} // namespace slimt

#ifdef HAS_INTGEMM
#include "3rd-party/intgemm/intgemm/intgemm.h"
namespace slimt {

void AffineIntgemm() {
// clang-format off
Expand Down Expand Up @@ -609,8 +613,8 @@ void AffineIntgemm() {

// Compute from the intgemm_affine function, used in the library.
// This ensures what we checked in there is consistent with what we expect.
Tensor y_whole = intgemm_affine(actual.A, actual.B, actual.bias, quant.a,
quant.b, "y_whole");
Tensor y_whole = qmm::affine(actual.A, actual.B, actual.bias, quant.a,
quant.b, "y_whole");
SLIMT_TRACE(y_whole.shape());
SLIMT_TRACE(y_expected.shape());
SLIMT_TRACE(mse(y_whole, y_expected));
Expand All @@ -626,7 +630,10 @@ void AffineIntgemm() {

// SLIMT_TRACE2(y_whole, y_expected);
}
} // namespace slimt
#endif

namespace slimt {
template <class Field>
struct Record {
Field model;
Expand Down Expand Up @@ -759,8 +766,10 @@ int main(int argc, char **argv) {
TEST_ENTRY(DotBatchedNodeOp), //
TEST_ENTRY(TransposeNodeOp), //
TEST_ENTRY(LayerNormalizationOp), //
TEST_ENTRY(AffineIntgemm), //
TEST_ENTRY(ShortlistGen) //
#ifdef HAS_INTGEMM
TEST_ENTRY(AffineIntgemm), //
#endif
TEST_ENTRY(ShortlistGen) //
});

// std::cout << "slimt test\n";
Expand Down Expand Up @@ -796,6 +805,5 @@ int main(int argc, char **argv) {
std::cerr << "Unknown test " << test << "\n";
std::exit(EXIT_FAILURE);
}

return 0;
}
5 changes: 5 additions & 0 deletions scripts/ci/android/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

sudo apt-get -y install ccache cmake
wget -c --quiet https://dl.google.com/android/repository/android-ndk-r23b-linux.zip
unzip -qq android-ndk-r23b-linux.zip
41 changes: 41 additions & 0 deletions scripts/ci/android/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

function cmake-configure {
NDK=android-ndk-r23b
ABI="arm64-v8a"
MINSDK_VERSION=28
ANDROID_PLATFORM=android-28

mkdir -p build
pushd build

SLIMT_ARGS=(
-DWITH_RUY=ON
-DWITH_INTGEMM=OFF
-DWITH_BLAS=OFF
)

OTHER_ANDROID_ARGS=(
-DANDROID_ARM_NEON=TRUE
)
# Additionally list variables finally configured.
set -x
cmake -L \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake \
-DANDROID_TOOLCHAIN=clang \
-DANDROID_ABI=$ABI \
-DANDROID_PLATFORM=$ANDROID_PLATFORM \
-DANDROID_NATIVE_API_LEVEL=$MINSDKVERSION \
-DANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.8 \
-DANDROID_STL=c++_static \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache \
"${SLIMT_ARGS[@]}" \
"${OTHER_ANDROID_ARGS[@]}" \
..
set +x
popd
}

cmake-configure
cmake --build build --target all
3 changes: 3 additions & 0 deletions scripts/ci/android/03-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

echo "Android is cross-compiled, no tests for now."
9 changes: 0 additions & 9 deletions scripts/ci/build.sh

This file was deleted.

3 changes: 3 additions & 0 deletions scripts/ci/macos/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

brew install cmake
9 changes: 9 additions & 0 deletions scripts/ci/macos/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -eo pipefail

# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INTGEMM=ON -DWITH_RUY=OFF

# Build
cmake --build build --target all
16 changes: 16 additions & 0 deletions scripts/ci/macos/03-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Install `bergamot` CLI via pip.
python3 -m pip install bergamot -f https://github.com/jerinphilip/bergamot-translator/releases/expanded_assets/latest

# Download en-de-tiny and de-en-tiny models.
bergamot download -m en-de-tiny

BROWSERMT="$HOME/Library/Application Support/bergamot/models/browsermt/"
PREFIX="$BROWSERMT/ende.student.tiny11"

MODEL=model.intgemm.alphas.bin
VOCAB=vocab.deen.spm
SHORTLIST=lex.s2t.bin

./build/bin/slimt --root "${PREFIX}" \
--model ${MODEL} --vocab ${VOCAB} --shortlist ${SHORTLIST} \
< data/sample.txt
4 changes: 4 additions & 0 deletions scripts/ci/ubuntu/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

sudo apt-get update
sudo apt-get install -y build-essential cmake
9 changes: 9 additions & 0 deletions scripts/ci/ubuntu/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -eo pipefail

# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INTGEMM=ON -DWITH_RUY=OFF

# Build
cmake --build build --target all
File renamed without changes.
4 changes: 3 additions & 1 deletion slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ add_library(
Tensor.cc
Model.cc
TensorOps.cc
QMM.cc
Batch.cc
Utils.cc
Shortlist.cc)

target_compile_options(slimt PRIVATE ${SLIMT_EXTRA_COMPILE_OPTIONS})
target_compile_options(slimt PRIVATE ${SLIMT_COMPILE_OPTIONS})
target_compile_definitions(slimt PRIVATE ${SLIMT_COMPILE_DEFINITIONS})
target_link_libraries(slimt PUBLIC ${SLIMT_EXTERNAL_LIBS})

if(WITH_ASAN)
Expand Down
Loading

0 comments on commit 774fbc7

Please sign in to comment.