Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Java API for spoken language identification with whisper multilingual models #817

Merged
merged 3 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/android.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
./build-android-arm64-v8a.sh
mkdir -p jniLibs/arm64-v8a/
cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/
rm -rf ./build-android-arm64-v8a/

- name: build android armv7-eabi
shell: bash
Expand All @@ -65,6 +66,7 @@ jobs:
./build-android-armv7-eabi.sh
mkdir -p ./jniLibs/armeabi-v7a/
cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/
rm -rf ./build-android-armv7-eabi

- name: build android x86_64
shell: bash
Expand All @@ -73,6 +75,7 @@ jobs:
./build-android-x86-64.sh
mkdir -p ./jniLibs/x86_64
cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64
rm -rf ./build-android-x86-64

- name: build android x86
shell: bash
Expand All @@ -81,6 +84,7 @@ jobs:
./build-android-x86.sh
mkdir -p ./jniLibs/x86
cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
rm -rf ./build-android-x86

- name: Copy files
shell: bash
Expand Down Expand Up @@ -112,6 +116,8 @@ jobs:
command: |
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
du -h -d1 .
ls -lh

rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
Expand Down
95 changes: 88 additions & 7 deletions .github/workflows/apk-kws.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,23 @@ jobs:
echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
ls -lh ${ANDROID_NDK_LATEST_HOME}

- name: Setup build tool version variable
shell: bash
run: |
echo "---"
ls -lh /usr/local/lib/android/
echo "---"

ls -lh /usr/local/lib/android/sdk
echo "---"

ls -lh /usr/local/lib/android/sdk/build-tools
echo "---"

BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
echo "Last build tool version is: $BUILD_TOOL_VERSION"

- name: build APK
shell: bash
run: |
Expand All @@ -59,13 +76,77 @@ jobs:
run: |
ls -lh ./apks/

- uses: actions/upload-artifact@v4

# https://github.com/marketplace/actions/sign-android-release
- uses: r0adkll/sign-android-release@v1
name: Sign app APK
with:
path: ./apks/*.apk
releaseDirectory: ./apks
signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
env:
BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

- name: Release APK
uses: svenstaro/upload-release-action@v2
- name: Display APK after signing
shell: bash
run: |
ls -lh ./apks/
du -h -d1 .

- name: Rename APK after signing
shell: bash
run: |
cd apks
rm -fv signingKey.jks
rm -fv *.apk.idsig
rm -fv *-aligned.apk

all_apks=$(ls -1 *-signed.apk)
echo "----"
echo $all_apks
echo "----"
for apk in ${all_apks[@]}; do
n=$(echo $apk | sed -e s/-signed//)
mv -v $apk $n
done

cd ..

ls -lh ./apks/
du -h -d1 .

- name: Display APK after rename
shell: bash
run: |
ls -lh ./apks/
du -h -d1 .

- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
file_glob: true
file: apks/*.apk
overwrite: true
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"

rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1

git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
cd huggingface
git fetch
git pull
git merge -m "merge remote" --ff origin main

mkdir -p kws
cp -v ../apks/*.apk ./kws/
git status
git lfs track "*.apk"
git add .
git commit -m "add more apks"
git push https://csukuangfj:[email protected]/csukuangfj/sherpa-onnx-apk main
8 changes: 8 additions & 0 deletions .github/workflows/run-java-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ jobs:
make -j4
ls -lh lib

- name: Run java test (Spoken language identification)
shell: bash
run: |
cd ./java-api-examples
./run-spoken-language-identification-whisper.sh
# Delete model files to save space
rm -rf sherpa-onnx-whisper-*

- name: Run java test (Streaming ASR)
shell: bash
run: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class MainActivity : AppCompatActivity() {
val config = OnlineRecognizerConfig(
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
modelConfig = getModelConfig(type = type)!!,
lmConfig = getOnlineLMConfig(type = type),
// lmConfig = getOnlineLMConfig(type = type),
endpointConfig = getEndpointConfig(),
enableEndpoint = true,
)
Expand Down
3 changes: 1 addition & 2 deletions java-api-examples/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
lib
hs_err*
!run-streaming*.sh
!run-non-streaming*.sh
!run-*.sh
6 changes: 6 additions & 0 deletions java-api-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ This directory contains examples for the JAVA API of sherpa-onnx.
./run-non-streaming-tts-coqui-de.sh
./run-non-streaming-tts-vits-zh.sh
```

## Spoken language identification

```bash
./run-spoken-language-identification-whisper.sh
```
61 changes: 61 additions & 0 deletions java-api-examples/SpokenLanguageIdentificationWhisper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a multilingual whisper model for
// spoken language identification.
//
// Note that it needs a multilingual whisper model. For instance,
// tiny works, but tiny.en doesn't.
import com.k2fsa.sherpa.onnx.*;

public class SpokenLanguageIdentificationWhisper {
public static void main(String[] args) {
// please download model and test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
String encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
String decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";

String[] testFiles =
new String[] {
"./spoken-language-identification-test-wavs/en-english.wav",
"./spoken-language-identification-test-wavs/de-german.wav",
"./spoken-language-identification-test-wavs/zh-chinese.wav",
"./spoken-language-identification-test-wavs/es-spanish.wav",
"./spoken-language-identification-test-wavs/fa-persian.wav",
"./spoken-language-identification-test-wavs/ko-korean.wav",
"./spoken-language-identification-test-wavs/ja-japanese.wav",
"./spoken-language-identification-test-wavs/ru-russian.wav",
"./spoken-language-identification-test-wavs/uk-ukrainian.wav",
};

SpokenLanguageIdentificationWhisperConfig whisper =
SpokenLanguageIdentificationWhisperConfig.builder()
.setEncoder(encoder)
.setDecoder(decoder)
.build();

SpokenLanguageIdentificationConfig config =
SpokenLanguageIdentificationConfig.builder()
.setWhisper(whisper)
.setNumThreads(1)
.setDebug(true)
.build();

SpokenLanguageIdentification slid = new SpokenLanguageIdentification(config);
for (String filename : testFiles) {
WaveReader reader = new WaveReader(filename);

OfflineStream stream = slid.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

String lang = slid.compute(stream);
System.out.println("---");
System.out.printf("filename: %s\n", filename);
System.out.printf("lang: %s\n", lang);

stream.release();
}
System.out.println("---");

slid.release();
}
}
59 changes: 59 additions & 0 deletions java-api-examples/run-spoken-language-identification-whisper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
fi

# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi

if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./SpokenLanguageIdentificationWhisper.java
4 changes: 4 additions & 0 deletions sherpa-onnx/java-api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ java_files += OfflineTtsConfig.java
java_files += GeneratedAudio.java
java_files += OfflineTts.java

java_files += SpokenLanguageIdentificationWhisperConfig.java
java_files += SpokenLanguageIdentificationConfig.java
java_files += SpokenLanguageIdentification.java

class_files := $(java_files:%.java=%.class)

java_files := $(addprefix src/$(package_dir)/,$(java_files))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

public class SpokenLanguageIdentification {
static {
System.loadLibrary("sherpa-onnx-jni");
}

private final Map<String, String> localeMap;
private long ptr = 0; // this is the asr engine ptrss

public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) {
ptr = newFromFile(config);

String[] languages = Locale.getISOLanguages();
localeMap = new HashMap<String, String>(languages.length);
for (String language : languages) {
Locale locale = new Locale(language);
localeMap.put(language, locale.getDisplayName());
}
}

public String compute(OfflineStream stream) {
String lang = compute(ptr, stream.getPtr());
return localeMap.getOrDefault(lang, lang);
}

public OfflineStream createStream() {
long p = createStream(ptr);
return new OfflineStream(p);
}

@Override
protected void finalize() throws Throwable {
release();
}

// You'd better call it manually if it is not used anymore
public void release() {
if (this.ptr == 0) {
return;
}
delete(this.ptr);
this.ptr = 0;
}

private native void delete(long ptr);

private native long newFromFile(SpokenLanguageIdentificationConfig config);

private native long createStream(long ptr);

private native String compute(long ptr, long streamPtr);
}
Loading