Skip to content

Commit

Permalink
snowboy only decodes two seconds per run
Browse files Browse the repository at this point in the history
This helps RPi to catch up with real-time.
  • Loading branch information
embie27 authored Dec 6, 2018
1 parent f23a4d5 commit b013364
Showing 1 changed file with 11 additions and 35 deletions.
46 changes: 11 additions & 35 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,50 +591,26 @@ def snowboy_wait_for_hot_word(self, snowboy_location, snowboy_hot_word_files, so
resampling_state = None

# buffers capable of holding 5 seconds of original and resampled audio
five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer))
frames = collections.deque(maxlen=five_seconds_buffer_count)
resampled_frames = collections.deque(maxlen=five_seconds_buffer_count)
started = False
pause_count = 0
pause_buffer_count = int(math.ceil(
self.pause_threshold / seconds_per_buffer)) # number of buffers of non-speaking audio during a phrase, before the phrase should be considered complete
two_seconds_buffer_count = int(math.ceil(2 / seconds_per_buffer))
frames = collections.deque(maxlen=two_seconds_buffer_count)
resampled_frames = collections.deque(maxlen=two_seconds_buffer_count)
while True:
elapsed_time += seconds_per_buffer
if timeout and elapsed_time > timeout:
raise WaitTimeoutError("listening timed out while waiting for hotword to be said")

buffer = source.stream.read(source.CHUNK)
if len(buffer) == 0: break # reached end of the stream
energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal
if energy > self.energy_threshold or started:
started = True
frames.append(buffer)

# resample audio to the required sample rate
if source.SAMPLE_RATE != snowboy_sample_rate:
resampled_buffer, resampling_state = audioop.ratecv(buffer, source.SAMPLE_WIDTH, 1,
source.SAMPLE_RATE, snowboy_sample_rate,
resampling_state)
resampled_frames.append(resampled_buffer)
else:
resampled_frames.append(buffer)
frames.append(buffer)

# run Snowboy on the resampled audio
snowboy_result = detector.RunDetection(b"".join(resampled_frames))
assert snowboy_result != -1, "Error initializing streams or reading audio data"
if snowboy_result > 0:
break # wake word found
# resample audio to the required sample rate
resampled_buffer, resampling_state = audioop.ratecv(buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE, snowboy_sample_rate, resampling_state)
resampled_frames.append(resampled_buffer)

pause_count += 1
if pause_count > pause_buffer_count: # end of the phrase
pause_count = 0
started = False

# dynamically adjust the energy threshold using asymmetric weighted average
if self.dynamic_energy_threshold:
damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer # account for different chunk sizes and rates
target_energy = energy * self.dynamic_energy_ratio
self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping)
# run Snowboy on the resampled audio
snowboy_result = detector.RunDetection(b"".join(resampled_frames))
assert snowboy_result != -1, "Error initializing streams or reading audio data"
if snowboy_result > 0: break # wake word found

return b"".join(frames), elapsed_time

Expand Down

0 comments on commit b013364

Please sign in to comment.