Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Java API for text-to-speech #811

Merged
merged 5 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/run-java-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,21 @@ jobs:

./run-non-streaming-decode-file-nemo.sh
rm -rf sherpa-onnx-nemo-*

- name: Run java test (Non-Streaming TTS)
shell: bash
run: |
cd ./java-api-examples
./run-non-streaming-tts-piper-en.sh
rm -rf vits-piper-*

./run-non-streaming-tts-coqui-de.sh
rm -rf vits-coqui-*

./run-non-streaming-tts-vits-zh.sh
rm -rf vits-zh-*

- uses: actions/upload-artifact@v4
with:
name: tts-wav-files-${{ matrix.os }}
path: java-api-examples/*.wav
50 changes: 50 additions & 0 deletions java-api-examples/NonStreamingTtsCoquiDe.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a Coqui-ai VITS German TTS model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsCoquiDe {
public static void main(String[] args) {
// please visit
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download model files
String model = "./vits-coqui-de-css10/model.onnx";
String tokens = "./vits-coqui-de-css10/tokens.txt";
String text = "Alles hat ein Ende, nur die Wurst hat zwei.";

OfflineTtsVitsModelConfig vitsModelConfig =
OfflineTtsVitsModelConfig.builder().setModel(model).setTokens(tokens).build();

OfflineTtsModelConfig modelConfig =
OfflineTtsModelConfig.builder()
.setVits(vitsModelConfig)
.setNumThreads(1)
.setDebug(true)
.build();

OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
OfflineTts tts = new OfflineTts(config);

int sid = 0;
float speed = 1.0f;
long start = System.currentTimeMillis();
GeneratedAudio audio = tts.generate(text, sid, speed);
long stop = System.currentTimeMillis();

float timeElapsedSeconds = (stop - start) / 1000.0f;

float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
float real_time_factor = timeElapsedSeconds / audioDuration;

String waveFilename = "tts-coqui-de.wav";
audio.save(waveFilename);
System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
System.out.printf("-- text: %s\n", text);
System.out.printf("-- Saved to %s\n", waveFilename);

tts.release();
}
}
58 changes: 58 additions & 0 deletions java-api-examples/NonStreamingTtsPiperEn.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a piper VITS English TTS model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsPiperEn {
public static void main(String[] args) {
// please visit
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download model files
String model = "./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx";
String tokens = "./vits-piper-en_GB-cori-medium/tokens.txt";
String dataDir = "./vits-piper-en_GB-cori-medium/espeak-ng-data";
String text =
"Today as always, men fall into two groups: slaves and free men. Whoever does not have"
+ " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
+ " businessman, an official, or a scholar.";

OfflineTtsVitsModelConfig vitsModelConfig =
OfflineTtsVitsModelConfig.builder()
.setModel(model)
.setTokens(tokens)
.setDataDir(dataDir)
.build();

OfflineTtsModelConfig modelConfig =
OfflineTtsModelConfig.builder()
.setVits(vitsModelConfig)
.setNumThreads(1)
.setDebug(true)
.build();

OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
OfflineTts tts = new OfflineTts(config);

int sid = 0;
float speed = 1.0f;
long start = System.currentTimeMillis();
GeneratedAudio audio = tts.generate(text, sid, speed);
long stop = System.currentTimeMillis();

float timeElapsedSeconds = (stop - start) / 1000.0f;

float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
float real_time_factor = timeElapsedSeconds / audioDuration;

String waveFilename = "tts-piper-en.wav";
audio.save(waveFilename);
System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
System.out.printf("-- text: %s\n", text);
System.out.printf("-- Saved to %s\n", waveFilename);

tts.release();
}
}
64 changes: 64 additions & 0 deletions java-api-examples/NonStreamingTtsVitsZh.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a VITS Chinese TTS model
// to convert text to speech.
//
// You can use https://github.com/Plachtaa/VITS-fast-fine-tuning
// to train your model
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsPiperEn {
public static void main(String[] args) {
// please visit
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download model files
String model = "./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx";
String tokens = "./vits-zh-hf-fanchen-C/tokens.txt";
String lexicon = "./vits-zh-hf-fanchen-C/lexicon.txt";
String dictDir = "./vits-zh-hf-fanchen-C/dict";
String ruleFsts =
"./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst";
String text = "有问题,请拨打110或者手机18601239876。我们的价值观是真诚热爱!";

OfflineTtsVitsModelConfig vitsModelConfig =
OfflineTtsVitsModelConfig.builder()
.setModel(model)
.setTokens(tokens)
.setLexicon(lexicon)
.setDictDir(dictDir)
.build();

OfflineTtsModelConfig modelConfig =
OfflineTtsModelConfig.builder()
.setVits(vitsModelConfig)
.setNumThreads(1)
.setDebug(true)
.build();

OfflineTtsConfig config =
OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();

OfflineTts tts = new OfflineTts(config);

int sid = 100;
float speed = 1.0f;
long start = System.currentTimeMillis();
GeneratedAudio audio = tts.generate(text, sid, speed);
long stop = System.currentTimeMillis();

float timeElapsedSeconds = (stop - start) / 1000.0f;

float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
float real_time_factor = timeElapsedSeconds / audioDuration;

String waveFilename = "tts-vits-zh.wav";
audio.save(waveFilename);
System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
System.out.printf("-- text: %s\n", text);
System.out.printf("-- Saved to %s\n", waveFilename);

tts.release();
}
}
8 changes: 8 additions & 0 deletions java-api-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ This directory contains examples for the JAVA API of sherpa-onnx.
./run-non-streaming-decode-file-whisper.sh
./run-non-streaming-decode-file-nemo.sh
```

## Non-Streaming text-to-speech

```bash
./run-non-streaming-tts-piper-en.sh
./run-non-streaming-tts-coqui-de.sh
./run-non-streaming-tts-vits-zh.sh
```
54 changes: 54 additions & 0 deletions java-api-examples/run-non-streaming-tts-coqui-de.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-coqui-de-css10/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingTtsCoquiDe.java
54 changes: 54 additions & 0 deletions java-api-examples/run-non-streaming-tts-piper-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-piper-en_GB-cori-medium/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingTtsPiperEn.java
54 changes: 54 additions & 0 deletions java-api-examples/run-non-streaming-tts-vits-zh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..

make -j4
ls -lh lib
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-zh-hf-fanchen-C/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
tar xf vits-zh-hf-fanchen-C.tar.bz2
rm vits-zh-hf-fanchen-C.tar.bz2
fi

java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
NonStreamingTtsVitsZh.java
6 changes: 6 additions & 0 deletions sherpa-onnx/java-api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ java_files += OfflineRecognizerResult.java
java_files += OfflineStream.java
java_files += OfflineRecognizer.java

java_files += OfflineTtsVitsModelConfig.java
java_files += OfflineTtsModelConfig.java
java_files += OfflineTtsConfig.java
java_files += GeneratedAudio.java
java_files += OfflineTts.java

class_files := $(java_files:%.java=%.class)

java_files := $(addprefix src/$(package_dir)/,$(java_files))
Expand Down
Loading