diff --git a/DCS-SR-ExternalAudio/Audio/AudioGenerator.cs b/DCS-SR-ExternalAudio/Audio/AudioGenerator.cs index 974e097fc..e2589deba 100644 --- a/DCS-SR-ExternalAudio/Audio/AudioGenerator.cs +++ b/DCS-SR-ExternalAudio/Audio/AudioGenerator.cs @@ -18,6 +18,10 @@ using Google.Cloud.TextToSpeech.V1; using Grpc.Core; +using Microsoft.CognitiveServices.Speech; +using Microsoft.CognitiveServices.Speech.Audio; +using Ciribob.DCS.SimpleRadio.Standalone.Common.Network; + namespace Ciribob.DCS.SimpleRadio.Standalone.ExternalAudioClient.Audio { @@ -116,7 +120,7 @@ private byte[] GoogleTTS(string msg) } } - AudioConfig config = new AudioConfig + Google.Cloud.TextToSpeech.V1.AudioConfig config = new Google.Cloud.TextToSpeech.V1.AudioConfig { AudioEncoding = AudioEncoding.Linear16, SampleRateHertz = INPUT_SAMPLE_RATE @@ -155,14 +159,56 @@ private byte[] GoogleTTS(string msg) } return new byte[0]; } - + + private async Task AzureTTS(string msg) + { + try + { + string[] connstring = opts.AzureCredentials.Split(';'); + string speechKey = connstring[0]; + string speechRegion = connstring[1]; + + var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); + // Set either the `SpeechSynthesisVoiceName` or `SpeechSynthesisLanguage`. + //speechConfig.SpeechSynthesisLanguage = "en-US"; + speechConfig.SpeechSynthesisLanguage = opts.Voice.Substring(0, 5); + //speechConfig.SpeechSynthesisVoiceName = "en-US-AvaMultilingualNeural"; + speechConfig.SpeechSynthesisVoiceName = opts.Voice; + + var speechSynthesizer = new Microsoft.CognitiveServices.Speech.SpeechSynthesizer(speechConfig, null); + + var result = await speechSynthesizer.SpeakTextAsync(msg); + var stream = AudioDataStream.FromResult(result); + + var tempFile = Path.GetTempFileName(); + await stream.SaveToWaveFileAsync(tempFile); + + byte[] bytes = null; + using (var reader = new WaveFileReader(tempFile)) + { + bytes = new byte[reader.Length]; + var read = reader.Read(bytes, 0, bytes.Length); + Logger.Info($"Success with Azure TTS - read {read} bytes"); + } + + //cleanup + File.Delete(tempFile); + + return bytes; + } + catch (Exception ex) + { + Logger.Error(ex, $"Error with Azure Text to Speech: {ex.Message}"); + } + return new byte[0]; + } private byte[] LocalTTS(string msg) { try { - using (var synth = new SpeechSynthesizer()) + using (var synth = new System.Speech.Synthesis.SpeechSynthesizer()) using (var stream = new MemoryStream()) { if (opts.Voice == null || opts.Voice.Length == 0) @@ -298,6 +344,10 @@ public List GetOpusBytes() { resampledBytes = GoogleTTS(msg); } + else if (!string.IsNullOrEmpty(opts.AzureCredentials)) + { + resampledBytes = AzureTTS(msg).GetAwaiter().GetResult(); + } else { resampledBytes = LocalTTS(msg); diff --git a/DCS-SR-ExternalAudio/Client/Program.cs b/DCS-SR-ExternalAudio/Client/Program.cs index 01e00426a..833735df2 100644 --- a/DCS-SR-ExternalAudio/Client/Program.cs +++ b/DCS-SR-ExternalAudio/Client/Program.cs @@ -156,6 +156,11 @@ public class Options Required = false)] public string GoogleCredentials { get; set; } + [Option('a', "azureCredentials", + HelpText = "Use your Azure AI Speech key and region, separated by semicolon", + Required = false)] + public string AzureCredentials { get; set; } + [Option('L', "latitude", HelpText = "Latitude of transmission", Required = false, Default = 0.0)] @@ -272,6 +277,8 @@ private static void HandleParseError(IEnumerable errs) Console.WriteLine($"Example:\n --text=\"I want any female voice with a German accent \" --freqs=251.0 --modulations=AM --coalition=1 --gender=male --locale=de-DE --googleCredentials=\"C:\\\\folder\\\\credentials.json\" "); + Console.WriteLine($"Example:\n --text=\"I want any female voice with a German accent \" --freqs=251.0 --modulations=AM --coalition=1 --gender=male --locale=de-DE --googleCredentials=\"xxxxxxxxxxxxxxxxxx;westeurope\" "); + } diff --git a/DCS-SR-ExternalAudio/DCS-SR-ExternalAudio.csproj b/DCS-SR-ExternalAudio/DCS-SR-ExternalAudio.csproj index 56793d64c..7dbc7bbab 100644 --- a/DCS-SR-ExternalAudio/DCS-SR-ExternalAudio.csproj +++ b/DCS-SR-ExternalAudio/DCS-SR-ExternalAudio.csproj @@ -116,6 +116,9 @@ ..\packages\Microsoft.Bcl.AsyncInterfaces.6.0.0\lib\net461\Microsoft.Bcl.AsyncInterfaces.dll + + ..\packages\Microsoft.CognitiveServices.Speech.1.40.0\lib\net462\Microsoft.CognitiveServices.Speech.csharp.dll + ..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll True @@ -336,9 +339,11 @@ + + \ No newline at end of file diff --git a/DCS-SR-ExternalAudio/packages.config b/DCS-SR-ExternalAudio/packages.config index 4c414244c..2479c087e 100644 --- a/DCS-SR-ExternalAudio/packages.config +++ b/DCS-SR-ExternalAudio/packages.config @@ -17,6 +17,7 @@ +