This repository has been archived by the owner on Jan 16, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 605
/
Copy pathSampleApplication.cpp
387 lines (335 loc) · 16.1 KB
/
SampleApplication.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
/*
* SampleApplication.cpp
*
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include "SampleApp/KeywordObserver.h"
#include "SampleApp/ConnectionObserver.h"
#include "SampleApp/SampleApplication.h"
#ifdef KWD_KITTAI
#include <KittAi/KittAiKeyWordDetector.h>
#elif KWD_SENSORY
#include <Sensory/SensoryKeywordDetector.h>
#endif
#include <Alerts/Storage/SQLiteAlertStorage.h>
#include <AuthDelegate/AuthDelegate.h>
#include <AVSCommon/AVS/Initialization/AlexaClientSDKInit.h>
#include <AVSCommon/Utils/Logger/LoggerSinkManager.h>
#include <MediaPlayer/MediaPlayer.h>
#include <algorithm>
#include <cctype>
#include <fstream>
namespace alexaClientSDK {
namespace sampleApp {
/// The sample rate of microphone audio data.
static const unsigned int SAMPLE_RATE_HZ = 16000;
/// The number of audio channels.
static const unsigned int NUM_CHANNELS = 1;
/// The size of each word within the stream.
static const size_t WORD_SIZE = 2;
/// The maximum number of readers of the stream.
static const size_t MAX_READERS = 10;
/// The amount of audio data to keep in the ring buffer.
static const std::chrono::seconds AMOUNT_OF_AUDIO_DATA_IN_BUFFER = std::chrono::seconds(15);
/// The size of the ring buffer.
static const size_t BUFFER_SIZE_IN_SAMPLES = (SAMPLE_RATE_HZ) * AMOUNT_OF_AUDIO_DATA_IN_BUFFER.count();
#ifdef KWD_KITTAI
/// The sensitivity of the Kitt.ai engine.
static const double KITT_AI_SENSITIVITY = 0.6;
/// The audio amplifier level of the Kitt.ai engine.
static const float KITT_AI_AUDIO_GAIN = 2.0;
/// Whether Kitt.ai should apply front end audio processing.
static const bool KITT_AI_APPLY_FRONT_END_PROCESSING = true;
#endif
/// A set of all log levels.
static const std::set<alexaClientSDK::avsCommon::utils::logger::Level> allLevels = {
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG9,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG8,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG7,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG6,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG5,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG4,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG3,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG2,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG1,
alexaClientSDK::avsCommon::utils::logger::Level::DEBUG0,
alexaClientSDK::avsCommon::utils::logger::Level::INFO,
alexaClientSDK::avsCommon::utils::logger::Level::WARN,
alexaClientSDK::avsCommon::utils::logger::Level::ERROR,
alexaClientSDK::avsCommon::utils::logger::Level::CRITICAL,
alexaClientSDK::avsCommon::utils::logger::Level::NONE
};
/**
* Gets a log level consumable by the SDK based on the user input string for log level.
*
* @param userInputLogLevel The string to be parsed into a log level.
* @return The log level. This will default to NONE if the input string is not properly parsable.
*/
static alexaClientSDK::avsCommon::utils::logger::Level getLogLevelFromUserInput(std::string userInputLogLevel) {
std::transform(userInputLogLevel.begin(), userInputLogLevel.end(), userInputLogLevel.begin(), ::toupper);
return alexaClientSDK::avsCommon::utils::logger::convertNameToLevel(userInputLogLevel);
}
/**
* The interface used to display messages in the console.
*
* TODO: g_consolePrinter is a static/global because it is passed by reference to changeSinkLogger() below,
* which keeps a reference to it for the lifetime of the logging system. If the logging system is refactoroed to
* use shared_ptrs (ACSDK-445), the ConsolePrinter can be instantiated as shared_ptr class member and passed to
* LoggerSinkManager.
*/
static alexaClientSDK::sampleApp::ConsolePrinter g_consolePrinter;
std::unique_ptr<SampleApplication> SampleApplication::create(
const std::string& pathToConfig,
const std::string& pathToInputFolder,
const std::string& logLevel) {
auto clientApplication = std::unique_ptr<SampleApplication>(new SampleApplication);
if (!clientApplication->initialize(pathToConfig, pathToInputFolder, logLevel)) {
ConsolePrinter::simplePrint("Failed to initialize SampleApplication");
return nullptr;
}
return clientApplication;
}
void SampleApplication::run() {
m_userInputManager->run();
}
bool SampleApplication::initialize(
const std::string& pathToConfig,
const std::string& pathToInputFolder,
const std::string& logLevel) {
/*
* Set up the SDK logging system to write to the SampleApp's ConsolePrinter. Also adjust the logging level
* if requested.
*/
if (!logLevel.empty()) {
auto logLevelValue = getLogLevelFromUserInput(logLevel);
if (alexaClientSDK::avsCommon::utils::logger::Level::UNKNOWN == logLevelValue) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Unknown log level input!");
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Possible log level options are: ");
for (auto it = allLevels.begin();
it != allLevels.end();
++it) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint(
alexaClientSDK::avsCommon::utils::logger::convertLevelToName(*it)
);
}
return false;
}
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint(
"Running app with log level: " +
alexaClientSDK::avsCommon::utils::logger::convertLevelToName(logLevelValue));
g_consolePrinter.setLevel(logLevelValue);
}
alexaClientSDK::avsCommon::utils::logger::LoggerSinkManager::instance().changeSinkLogger(g_consolePrinter);
/*
* This is a required step upon startup of the SDK before any modules are created. For that reason, it is being
* called here, before creating the MediaPlayer, audio streams, DefaultClient, etc.
*/
std::ifstream configInfile(pathToConfig);
if (!configInfile.good()) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to read config file!");
return false;
}
if (!avsCommon::avs::initialization::AlexaClientSDKInit::initialize({&configInfile})) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to initialize SDK!");
return false;
}
/*
* Creating the media players. Here, the default GStreamer based MediaPlayer is being created. However, any
* MediaPlayer that follows the specified MediaPlayerInterface can work.
*/
auto speakMediaPlayer = alexaClientSDK::mediaPlayer::MediaPlayer::create();
if (!speakMediaPlayer) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create media player for speech!");
return false;
}
auto audioMediaPlayer = alexaClientSDK::mediaPlayer::MediaPlayer::create();
if (!audioMediaPlayer) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create media player for content!");
return false;
}
auto alertsMediaPlayer = alexaClientSDK::mediaPlayer::MediaPlayer::create();
if (!alertsMediaPlayer) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create media player for alerts!");
return false;
}
// Creating the alert storage object to be used for rendering and storing alerts.
auto alertStorage = std::make_shared<alexaClientSDK::capabilityAgents::alerts::storage::SQLiteAlertStorage>();
/*
* Creating the UI component that observes various components and prints to the console accordingly.
*/
auto userInterfaceManager = std::make_shared<alexaClientSDK::sampleApp::UIManager>();
/*
* Setting up a connection observer to wait for connection and authorization prior to accepting user input at
* startup.
*/
auto connectionObserver = std::make_shared<alexaClientSDK::sampleApp::ConnectionObserver>();
/*
* Creating the AuthDelegate - this component takes care of LWA and authorization of the client. At the moment,
* this must be done and authorization must be achieved prior to making the call to connect().
*/
std::shared_ptr<alexaClientSDK::authDelegate::AuthDelegate> authDelegate =
alexaClientSDK::authDelegate::AuthDelegate::create();
authDelegate->addAuthObserver(connectionObserver);
/*
* Creating the DefaultClient - this component serves as an out-of-box default object that instantiates and "glues"
* together all the modules.
*/
std::shared_ptr<alexaClientSDK::defaultClient::DefaultClient> client =
alexaClientSDK::defaultClient::DefaultClient::create(
speakMediaPlayer,
audioMediaPlayer,
alertsMediaPlayer,
authDelegate,
alertStorage,
{userInterfaceManager},
{connectionObserver, userInterfaceManager});
if (!client) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create default SDK client!");
return false;
}
/*
* TODO: ACSDK-384 Remove the requirement of clients having to wait for authorization before making the connect()
* call.
*/
if (!connectionObserver->waitFor(
alexaClientSDK::avsCommon::sdkInterfaces::AuthObserverInterface::State::REFRESHED)) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to authorize SDK client!");
return false;
}
client->connect();
if (!connectionObserver->waitFor(
avsCommon::sdkInterfaces::ConnectionStatusObserverInterface::Status::POST_CONNECTED)) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to connect to AVS!");
return false;
}
/*
* Creating the buffer (Shared Data Stream) that will hold user audio data. This is the main input into the SDK.
*/
size_t bufferSize = alexaClientSDK::avsCommon::avs::AudioInputStream::calculateBufferSize(
BUFFER_SIZE_IN_SAMPLES, WORD_SIZE, MAX_READERS);
auto buffer = std::make_shared<alexaClientSDK::avsCommon::avs::AudioInputStream::Buffer>(bufferSize);
std::shared_ptr<alexaClientSDK::avsCommon::avs::AudioInputStream> sharedDataStream =
alexaClientSDK::avsCommon::avs::AudioInputStream::create(buffer, WORD_SIZE, MAX_READERS);
if (!sharedDataStream) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create shared data stream!");
return false;
}
alexaClientSDK::avsCommon::utils::AudioFormat compatibleAudioFormat;
compatibleAudioFormat.sampleRateHz = SAMPLE_RATE_HZ;
compatibleAudioFormat.sampleSizeInBits = WORD_SIZE * CHAR_BIT;
compatibleAudioFormat.numChannels = NUM_CHANNELS;
compatibleAudioFormat.endianness = alexaClientSDK::avsCommon::utils::AudioFormat::Endianness::LITTLE;
compatibleAudioFormat.encoding = alexaClientSDK::avsCommon::utils::AudioFormat::Encoding::LPCM;
/*
* Creating each of the audio providers. An audio provider is a simple package of data consisting of the stream
* of audio data, as well as metadata about the stream. For each of the three audio providers created here, the same
* stream is used since this sample application will only have one microphone.
*/
// Creating tap to talk audio provider
bool tapAlwaysReadable = true;
bool tapCanOverride = true;
bool tapCanBeOverridden = true;
alexaClientSDK::capabilityAgents::aip::AudioProvider tapToTalkAudioProvider(
sharedDataStream,
compatibleAudioFormat,
alexaClientSDK::capabilityAgents::aip::ASRProfile::NEAR_FIELD,
tapAlwaysReadable,
tapCanOverride,
tapCanBeOverridden);
// Creating hold to talk audio provider
bool holdAlwaysReadable = false;
bool holdCanOverride = true;
bool holdCanBeOverridden = false;
alexaClientSDK::capabilityAgents::aip::AudioProvider holdToTalkAudioProvider(
sharedDataStream,
compatibleAudioFormat,
alexaClientSDK::capabilityAgents::aip::ASRProfile::CLOSE_TALK,
holdAlwaysReadable,
holdCanOverride,
holdCanBeOverridden);
std::shared_ptr<alexaClientSDK::sampleApp::PortAudioMicrophoneWrapper> micWrapper =
alexaClientSDK::sampleApp::PortAudioMicrophoneWrapper::create(sharedDataStream);
if (!micWrapper) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create PortAudioMicrophoneWrapper!");
return false;
}
// Creating wake word audio provider, if necessary
#ifdef KWD
bool wakeAlwaysReadable = true;
bool wakeCanOverride = false;
bool wakeCanBeOverridden = true;
alexaClientSDK::capabilityAgents::aip::AudioProvider wakeWordAudioProvider(
sharedDataStream,
compatibleAudioFormat,
alexaClientSDK::capabilityAgents::aip::ASRProfile::NEAR_FIELD,
wakeAlwaysReadable,
wakeCanOverride,
wakeCanBeOverridden);
// This observer is notified any time a keyword is detected and notifies the DefaultClient to start recognizing.
auto keywordObserver = std::make_shared<alexaClientSDK::sampleApp::KeywordObserver>(client, wakeWordAudioProvider);
#if defined(KWD_KITTAI)
m_keywordDetector = alexaClientSDK::kwd::KittAiKeyWordDetector::create(
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
pathToInputFolder + "/common.res",
{{pathToInputFolder + "/alexa.umdl", "ALEXA", KITT_AI_SENSITIVITY}},
KITT_AI_AUDIO_GAIN,
KITT_AI_APPLY_FRONT_END_PROCESSING);
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create KittAiKeywWordDetector!");
return false;
}
#elif defined(KWD_SENSORY)
m_keywordDetector = kwd::SensoryKeywordDetector::create(
sharedDataStream,
compatibleAudioFormat,
{keywordObserver},
std::unordered_set<
std::shared_ptr<alexaClientSDK::avsCommon::sdkInterfaces::KeyWordDetectorStateObserverInterface>>(),
pathToInputFolder + "/spot-alexa-rpi-31000.snsr");
if (!m_keywordDetector) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create SensoryKeywWordDetector!");
return false;
}
#endif
// If wake word is enabled, then creating the interaction manager with a wake word audio provider.
auto interactionManager = std::make_shared<alexaClientSDK::sampleApp::InteractionManager>(
client,
micWrapper,
userInterfaceManager,
holdToTalkAudioProvider,
tapToTalkAudioProvider,
wakeWordAudioProvider);
#else
// If wake word is not enabled, then creating the interaction manager without a wake word audio provider.
auto interactionManager = std::make_shared<alexaClientSDK::sampleApp::InteractionManager>(
client,
micWrapper,
userInterfaceManager,
holdToTalkAudioProvider,
tapToTalkAudioProvider);
#endif
// Creating the input observer.
m_userInputManager = alexaClientSDK::sampleApp::UserInputManager::create(interactionManager);
if (!m_userInputManager) {
alexaClientSDK::sampleApp::ConsolePrinter::simplePrint("Failed to create UserInputManager!");
return false;
}
return true;
}
} // namespace sampleApp
} // namespace alexaClientSDK