diff --git a/HACKING.md b/HACKING.md index 937db693..5b76c2a8 100644 --- a/HACKING.md +++ b/HACKING.md @@ -31,10 +31,21 @@ on Raspbian 2017-07-05 and later. You'll also need to configure ALSA: ``` shell sudo scripts/configure-driver.sh +sudo reboot +``` + +After your Pi has rebooted with the driver enabled, run: + +``` +cd ~/AIY-projects-python sudo scripts/install-alsa-config.sh +python3 checkpoints/check_audio.py sudo reboot ``` +Don't skip running `check_audio.py` before rebooting, as it has an important +effect on the state of ALSA, the sound architecture. + ## Get cloud credentials To access the cloud services you need to register a project and generate diff --git a/src/aiy/_drivers/_led.py b/src/aiy/_drivers/_led.py index 9578a1c2..44951723 100644 --- a/src/aiy/_drivers/_led.py +++ b/src/aiy/_drivers/_led.py @@ -90,7 +90,7 @@ def _animate(self): running = self.running if not running: return - if state: + if state is not None: if not self._parse_state(state): raise ValueError('unsupported state: %d' % state) if self.iterator: diff --git a/src/aiy/_drivers/_status_ui.py b/src/aiy/_drivers/_status_ui.py index c6be6256..64556729 100644 --- a/src/aiy/_drivers/_status_ui.py +++ b/src/aiy/_drivers/_status_ui.py @@ -53,6 +53,7 @@ def set_trigger_sound_wave(self, trigger_sound_wave): """ if not trigger_sound_wave: self._trigger_sound_wave = None + return expanded_path = os.path.expanduser(trigger_sound_wave) if os.path.exists(expanded_path): self._trigger_sound_wave = expanded_path diff --git a/src/aiy/_drivers/_tts.py b/src/aiy/_drivers/_tts.py index a941b479..c223479b 100644 --- a/src/aiy/_drivers/_tts.py +++ b/src/aiy/_drivers/_tts.py @@ -33,13 +33,15 @@ def create_say(player): return functools.partial(say, player, lang=lang) -def say(player, words, lang='en-US'): +def say(player, words, lang='en-US', volume=60, pitch=130): """Say the given words with TTS. Args: player: To play the text-to-speech audio. words: string to say aloud. lang: language for the text-to-speech engine. + volume: volume for the text-to-speech engine. + pitch: pitch for the text-to-speech engine. """ try: (fd, tts_wav) = tempfile.mkstemp(suffix='.wav', dir=TMP_DIR) @@ -47,7 +49,8 @@ def say(player, words, lang='en-US'): logger.exception('Using fallback directory for TTS output') (fd, tts_wav) = tempfile.mkstemp(suffix='.wav') os.close(fd) - words = '%s' % words + words = '' + words + '' try: subprocess.call(['pico2wave', '--lang', lang, '-w', tts_wav, words]) player.play_wav(tts_wav) diff --git a/src/aiy/audio.py b/src/aiy/audio.py index 7711acc7..7f766bbd 100644 --- a/src/aiy/audio.py +++ b/src/aiy/audio.py @@ -28,6 +28,8 @@ _voicehat_recorder = None _voicehat_player = None _status_ui = None +_tts_volume = 60 +_tts_pitch = 130 class _WaveDump(object): @@ -108,15 +110,24 @@ def play_audio(audio_data): player.play_bytes(audio_data, sample_width=AUDIO_SAMPLE_SIZE, sample_rate=AUDIO_SAMPLE_RATE_HZ) -def say(words, lang=None): +def say(words, lang=None, volume=None, pitch=None): """Says the given words in the given language with Google TTS engine. - If lang is specified, e.g. "en-US', it will be used to say the given words. + If lang is specified, e.g. "en-US", it will be used to say the given words. Otherwise, the language from aiy.i18n will be used. + volume (optional) volume used to say the given words. + pitch (optional) pitch to say the given words. + Example: aiy.audio.say('This is an example', lang="en-US", volume=75, pitch=135) + Any of the optional variables can be left out. """ + if not lang: lang = aiy.i18n.get_language_code() - aiy._drivers._tts.say(aiy.audio.get_player(), words, lang=lang) + if not volume: + volume = aiy.audio.get_tts_volume() + if not pitch: + pitch = aiy.audio.get_tts_pitch() + aiy._drivers._tts.say(aiy.audio.get_player(), words, lang=lang, volume=volume, pitch=pitch) def get_status_ui(): @@ -129,3 +140,23 @@ def get_status_ui(): if not _status_ui: _status_ui = aiy._drivers._StatusUi() return _status_ui + + +def set_tts_volume(volume): + global _tts_volume + _tts_volume = volume + + +def get_tts_volume(): + global _tts_volume + return _tts_volume + + +def set_tts_pitch(pitch): + global _tts_pitch + _tts_pitch = pitch + + +def get_tts_pitch(): + global _tts_pitch + return _tts_pitch diff --git a/src/aiy/cloudspeech.py b/src/aiy/cloudspeech.py index c38ea54d..184ed9b5 100644 --- a/src/aiy/cloudspeech.py +++ b/src/aiy/cloudspeech.py @@ -34,17 +34,51 @@ class _CloudSpeechRecognizer(object): def __init__(self, credentials_file): self._request = aiy._apis._speech.CloudSpeechRequest(credentials_file) self._recorder = aiy.audio.get_recorder() + self._hotwords = [] def recognize(self): """Recognizes the user's speech and transcript it into text. This function listens to the user's speech via the VoiceHat speaker. Then it contacts Google CloudSpeech APIs and returns a textual transcript if possible. + If hotword list is populated this method will only respond if hotword is said. """ self._request.reset() self._request.set_endpointer_cb(self._endpointer_callback) self._recorder.add_processor(self._request) - return self._request.do_request().transcript + text = self._request.do_request().transcript + if self._hotwords and text: + text = text.lower() + loc_min = len(text) + hotword_found = '' + for hotword in self._hotwords: + loc_temp = text.find(hotword) + if loc_temp > -1 and loc_min > loc_temp: + loc_min = loc_temp + hotword_found = hotword + if hotword_found: + parse_text = text.split(hotword_found)[1] + return parse_text.strip() + else: + return '' + else: + return '' if self._hotwords else text + + def expect_hotword(self, hotword_list): + """Enables hotword detection for a selected list + This method is optional and populates the list of hotwords + to be used for hotword activation. + + For example, to create a recognizer for Google: + + recognizer.expect_hotword('Google') + recognizer.expect_hotword(['Google','Raspberry Pi']) + """ + if isinstance(hotword_list, list): + for hotword in hotword_list: + self._hotwords.append(hotword.lower()) + else: + self._hotwords.append(hotword_list.lower()) def expect_phrase(self, phrase): """Explicitly tells the engine that the phrase is more likely to appear.