-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_live.py
50 lines (39 loc) · 1.7 KB
/
test_live.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# test_live.py
import numpy as np
import tensorflow as tf
import sounddevice as sd
import librosa
from tensorflow.keras.models import load_model
# Load the trained model
model = load_model("wake_word_model.h5")
# Define the feature extraction parameters
N_MFCC = 13
SAMPLING_RATE = 16000 # Adjust if necessary
def extract_features(audio):
# Extract MFCC features from the audio
mfcc = librosa.feature.mfcc(y=audio, sr=SAMPLING_RATE, n_mfcc=N_MFCC)
return mfcc.T # Transpose to fit the expected input shape
def detect_wake_word():
print("Listening for wake word...")
while True:
# Record audio for 1 second
audio = sd.rec(int(SAMPLING_RATE), samplerate=SAMPLING_RATE, channels=1, dtype='float32')
sd.wait()
# Extract features from the recorded audio
features = extract_features(audio.flatten())
# Ensure the shape is compatible with Conv1D
if features.shape[0] < 32: # If the sequence length is less than required
features = np.pad(features, ((0, 32 - features.shape[0]), (0, 0)), mode='constant') # Pad if needed
features = np.expand_dims(features, axis=0) # Add batch dimension
# Verify that the shape matches the input expected by Conv1D
features = np.expand_dims(features, axis=-1) # Add channel dimension
# Predict using the trained model
prediction = model.predict(features)
predicted_class = np.argmax(prediction, axis=1)[0]
# Output result
if predicted_class == 0:
print("Wake word detected!")
else:
print("No wake word detected.")
if __name__ == "__main__":
detect_wake_word()