snowboy only decodes two seconds per run

This helps RPi to catch up with real-time.
embie27 · Dec 6, 2018 · b013364 · b013364
1 parent f23a4d5
commit b013364
Showing 1 changed file with 11 additions and 35 deletions.
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
@@ -591,50 +591,26 @@ def snowboy_wait_for_hot_word(self, snowboy_location, snowboy_hot_word_files, so
         resampling_state = None
 
         # buffers capable of holding 5 seconds of original and resampled audio
-        five_seconds_buffer_count = int(math.ceil(5 / seconds_per_buffer))
-        frames = collections.deque(maxlen=five_seconds_buffer_count)
-        resampled_frames = collections.deque(maxlen=five_seconds_buffer_count)
-        started = False
-        pause_count = 0
-        pause_buffer_count = int(math.ceil(
-        self.pause_threshold / seconds_per_buffer))  # number of buffers of non-speaking audio during a phrase, before the phrase should be considered complete
+        two_seconds_buffer_count = int(math.ceil(2 / seconds_per_buffer))
+        frames = collections.deque(maxlen=two_seconds_buffer_count)
+        resampled_frames = collections.deque(maxlen=two_seconds_buffer_count)
         while True:
             elapsed_time += seconds_per_buffer
             if timeout and elapsed_time > timeout:
                 raise WaitTimeoutError("listening timed out while waiting for hotword to be said")
 
             buffer = source.stream.read(source.CHUNK)
             if len(buffer) == 0: break  # reached end of the stream
-            energy = audioop.rms(buffer, source.SAMPLE_WIDTH)  # energy of the audio signal
-            if energy > self.energy_threshold or started:
-                started = True
-                frames.append(buffer)
-
-                # resample audio to the required sample rate
-                if source.SAMPLE_RATE != snowboy_sample_rate:
-                    resampled_buffer, resampling_state = audioop.ratecv(buffer, source.SAMPLE_WIDTH, 1,
-                                                                        source.SAMPLE_RATE, snowboy_sample_rate,
-                                                                        resampling_state)
-                    resampled_frames.append(resampled_buffer)
-                else:
-                    resampled_frames.append(buffer)
+            frames.append(buffer)
 
-                # run Snowboy on the resampled audio
-                snowboy_result = detector.RunDetection(b"".join(resampled_frames))
-                assert snowboy_result != -1, "Error initializing streams or reading audio data"
-                if snowboy_result > 0:
-                    break  # wake word found
+            # resample audio to the required sample rate
+            resampled_buffer, resampling_state = audioop.ratecv(buffer, source.SAMPLE_WIDTH, 1, source.SAMPLE_RATE, snowboy_sample_rate, resampling_state)
+            resampled_frames.append(resampled_buffer)
 
-                pause_count += 1
-                if pause_count > pause_buffer_count:  # end of the phrase
-                    pause_count = 0
-                    started = False
-
-            # dynamically adjust the energy threshold using asymmetric weighted average
-            if self.dynamic_energy_threshold:
-                damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer  # account for different chunk sizes and rates
-                target_energy = energy * self.dynamic_energy_ratio
-                self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping)
+            # run Snowboy on the resampled audio
+            snowboy_result = detector.RunDetection(b"".join(resampled_frames))
+            assert snowboy_result != -1, "Error initializing streams or reading audio data"
+            if snowboy_result > 0: break  # wake word found
 
         return b"".join(frames), elapsed_time