diff --git a/.github/workflows/aarch64-linux-gnu.yaml b/.github/workflows/aarch64-linux-gnu.yaml index 1a42e6100..d50d70e78 100644 --- a/.github/workflows/aarch64-linux-gnu.yaml +++ b/.github/workflows/aarch64-linux-gnu.yaml @@ -22,9 +22,6 @@ on: - 'cmake/**' - 'sherpa-onnx/csrc/*' - 'toolchains/aarch64-linux-gnu.toolchain.cmake' - release: - types: - - published workflow_dispatch: inputs: diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml index 8c2ffaa09..785d94d61 100644 --- a/.github/workflows/build-xcframework.yaml +++ b/.github/workflows/build-xcframework.yaml @@ -9,9 +9,6 @@ on: - './build-ios.sh' tags: - '*' - release: - types: - - published workflow_dispatch: diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c0a7c19b..c477d3232 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.7.19") +set(SHERPA_ONNX_VERSION "1.7.20") # Disable warning about # diff --git a/android/.gitignore b/android/.gitignore new file mode 100644 index 000000000..76f257c55 --- /dev/null +++ b/android/.gitignore @@ -0,0 +1,34 @@ +# Gradle files +.gradle/ +build/ + +# Local configuration file (sdk path, etc) +local.properties + +# Log/OS Files +*.log + +# Android Studio generated files and folders +captures/ +.externalNativeBuild/ +.cxx/ +*.apk +output.json + +# IntelliJ +*.iml +.idea/ +misc.xml +deploymentTargetDropDown.xml +render.experimental.xml + +# Keystore files +*.jks +*.keystore + +# Google Services (e.g. APIs or Firebase) +google-services.json + +# Android Profiling +*.hprof +*.so diff --git a/android/README.md b/android/README.md index 4e6e3c99c..16489dbea 100644 --- a/android/README.md +++ b/android/README.md @@ -3,3 +3,13 @@ Please refer to https://k2-fsa.github.io/sherpa/onnx/android/index.html for usage. + +- [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model. + +- [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model + for the first pass and use a non-streaming ASR model for the second pass. + +- [SherpaOnnxVad](./SherpaOnnxVad) It demonstrates how to use a VAD + +- [SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It uses a VAD with a non-streaming + ASR model. diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt index 99ca65827..9198d7dc0 100644 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt @@ -53,6 +53,8 @@ data class OnlineRecognizerConfig( var enableEndpoint: Boolean = true, var decodingMethod: String = "greedy_search", var maxActivePaths: Int = 4, + var hotwordsFile: String = "", + var hotwordsScore: Float = 1.5f, ) data class OfflineTransducerModelConfig( @@ -87,6 +89,8 @@ data class OfflineRecognizerConfig( // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it var decodingMethod: String = "greedy_search", var maxActivePaths: Int = 4, + var hotwordsFile: String = "", + var hotwordsScore: Float = 1.5f, ) class SherpaOnnx( @@ -370,6 +374,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { ) } + 5 -> { + val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-20-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + } return null } diff --git a/android/SherpaOnnxVadAsr/.gitignore b/android/SherpaOnnxVadAsr/.gitignore new file mode 100644 index 000000000..aa724b770 --- /dev/null +++ b/android/SherpaOnnxVadAsr/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxVadAsr/app/.gitignore b/android/SherpaOnnxVadAsr/app/.gitignore new file mode 100644 index 000000000..42afabfd2 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/build.gradle b/android/SherpaOnnxVadAsr/app/build.gradle new file mode 100644 index 000000000..c9b51a6fd --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/build.gradle @@ -0,0 +1,44 @@ +plugins { + id 'com.android.application' + id 'org.jetbrains.kotlin.android' +} + +android { + namespace 'com.k2fsa.sherpa.onnx' + compileSdk 33 + + defaultConfig { + applicationId "com.k2fsa.sherpa.onnx" + minSdk 21 + targetSdk 33 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = '1.8' + } +} + +dependencies { + + implementation 'androidx.core:core-ktx:1.7.0' + implementation 'androidx.appcompat:appcompat:1.6.1' + implementation 'com.google.android.material:material:1.9.0' + implementation 'androidx.constraintlayout:constraintlayout:2.1.4' + testImplementation 'junit:junit:4.13.2' + androidTestImplementation 'androidx.test.ext:junit:1.1.5' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' +} \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/proguard-rules.pro b/android/SherpaOnnxVadAsr/app/proguard-rules.pro new file mode 100644 index 000000000..481bb4348 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt b/android/SherpaOnnxVadAsr/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt new file mode 100644 index 000000000..183383202 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/AndroidManifest.xml b/android/SherpaOnnxVadAsr/app/src/main/AndroidManifest.xml new file mode 100644 index 000000000..986a17d50 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/assets/.gitignore b/android/SherpaOnnxVadAsr/app/src/main/assets/.gitignore new file mode 100644 index 000000000..e1a699ac3 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/assets/.gitignore @@ -0,0 +1 @@ +*.onnx diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt new file mode 100644 index 000000000..6668bb37d --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -0,0 +1,213 @@ +package com.k2fsa.sherpa.onnx + +import android.Manifest +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.os.Bundle +import android.text.method.ScrollingMovementMethod +import android.util.Log +import android.view.View +import android.widget.Button +import android.widget.TextView +import androidx.appcompat.app.AppCompatActivity +import androidx.core.app.ActivityCompat +import kotlin.concurrent.thread + + +private const val TAG = "sherpa-onnx" +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 + +class MainActivity : AppCompatActivity() { + + private lateinit var recordButton: Button + private lateinit var textView: TextView + + private lateinit var vad: Vad + + private var audioRecord: AudioRecord? = null + private var recordingThread: Thread? = null + private val audioSource = MediaRecorder.AudioSource.MIC + private val sampleRateInHz = 16000 + private val channelConfig = AudioFormat.CHANNEL_IN_MONO + + // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT + // since the AudioRecord.read(float[]) needs API level >= 23 + // but we are targeting API level >= 21 + private val audioFormat = AudioFormat.ENCODING_PCM_16BIT + + private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) + + // Non-streaming ASR + private lateinit var offlineRecognizer: SherpaOnnxOffline + + private var idx: Int = 0 + private var lastText: String = "" + + @Volatile + private var isRecording: Boolean = false + + override fun onRequestPermissionsResult( + requestCode: Int, permissions: Array, grantResults: IntArray + ) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { + grantResults[0] == PackageManager.PERMISSION_GRANTED + } else { + false + } + + if (!permissionToRecordAccepted) { + Log.e(TAG, "Audio record is disallowed") + finish() + } + + Log.i(TAG, "Audio record is permitted") + } + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContentView(R.layout.activity_main) + + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + + Log.i(TAG, "Start to initialize model") + initVadModel() + Log.i(TAG, "Finished initializing model") + + Log.i(TAG, "Start to initialize non-streaimng recognizer") + initOfflineRecognizer() + Log.i(TAG, "Finished initializing non-streaming recognizer") + + recordButton = findViewById(R.id.record_button) + recordButton.setOnClickListener { onclick() } + + textView = findViewById(R.id.my_text) + textView.movementMethod = ScrollingMovementMethod() + } + + private fun onclick() { + if (!isRecording) { + val ret = initMicrophone() + if (!ret) { + Log.e(TAG, "Failed to initialize microphone") + return + } + Log.i(TAG, "state: ${audioRecord?.state}") + audioRecord!!.startRecording() + recordButton.setText(R.string.stop) + isRecording = true + + textView.text = "" + lastText = "" + idx = 0 + + vad.reset() + recordingThread = thread(true) { + processSamples() + } + Log.i(TAG, "Started recording") + } else { + isRecording = false + + audioRecord!!.stop() + audioRecord!!.release() + audioRecord = null + + recordButton.setText(R.string.start) + Log.i(TAG, "Stopped recording") + } + } + + private fun initVadModel() { + val type = 0 + println("Select VAD model type ${type}") + val config = getVadModelConfig(type) + + vad = Vad( + assetManager = application.assets, + config = config!!, + ) + } + + private fun initMicrophone(): Boolean { + if (ActivityCompat.checkSelfPermission( + this, Manifest.permission.RECORD_AUDIO + ) != PackageManager.PERMISSION_GRANTED + ) { + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + return false + } + + val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) + Log.i( + TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}" + ) + + audioRecord = AudioRecord( + audioSource, + sampleRateInHz, + channelConfig, + audioFormat, + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM + ) + return true + } + + private fun processSamples() { + Log.i(TAG, "processing samples") + + val bufferSize = 512 // in samples + val buffer = ShortArray(bufferSize) + + while (isRecording) { + val ret = audioRecord?.read(buffer, 0, buffer.size) + if (ret != null && ret > 0) { + val samples = FloatArray(ret) { buffer[it] / 32768.0f } + + vad.acceptWaveform(samples) + while(!vad.empty()) { + var objArray = vad.front() + val samples = objArray[1] as FloatArray + val text = runSecondPass(samples) + + if (text.isNotBlank()) { + lastText = "${lastText}\n${idx}: ${text}" + idx += 1 + } + + vad.pop(); + } + + val isSpeechDetected = vad.isSpeechDetected() + + runOnUiThread { + textView.text = lastText.lowercase() + } + } + } + } + + private fun initOfflineRecognizer() { + // Please change getOfflineModelConfig() to add new models + // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + // for a list of available models + val secondType = 0 + println("Select model type ${secondType} for the second pass") + + val config = OfflineRecognizerConfig( + featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), + modelConfig = getOfflineModelConfig(type = secondType)!!, + ) + + offlineRecognizer = SherpaOnnxOffline( + assetManager = application.assets, + config = config, + ) + } + + private fun runSecondPass(samples: FloatArray): String { + return offlineRecognizer.decode(samples, sampleRateInHz) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt new file mode 120000 index 000000000..57ba3e85a --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt @@ -0,0 +1 @@ +../../../../../../../../../SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt new file mode 120000 index 000000000..f430a1056 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt @@ -0,0 +1 @@ +../../../../../../../../../SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxVadAsr/app/src/main/jniLibs/arm64-v8a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxVadAsr/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxVadAsr/app/src/main/jniLibs/armeabi-v7a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86_64/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxVadAsr/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxVadAsr/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..2b068d114 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxVadAsr/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxVadAsr/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..07d5da9cb --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxVadAsr/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnxVadAsr/app/src/main/res/layout/activity_main.xml new file mode 100644 index 000000000..f9b35e862 --- /dev/null +++ b/android/SherpaOnnxVadAsr/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,39 @@ + + + + + + + +