Skip to content

Commit

Permalink
Merge branch 'secretsauceai:dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
skewballfox authored Jan 10, 2022
2 parents 5cb7bfc + 022a758 commit cfcd1a5
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 4 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ __pycache__/
*.pb
*.params
*.net
*.json
*.pbtxt
*.wav

Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# wakeword-data-prep
# Precise Wakeword Model Maker
This is a work in progress!

After collecting your wake word data set with the [wakeword data collection tool](https://github.com/AmateurAcademic/wakeword-recorder-py), you can use this tool to:
Expand Down Expand Up @@ -45,8 +45,9 @@ This is still a work in progress.
* ~~adding noise~~
* ~~Gaussian noise~~
* ~~background noise (precise-add-noise)~~
* Refactor model analytics and choosing the best model
* Refactor the training function to pass both measures: the default `loss` and `val_loss`
* ~~Refactor model analytics and choosing the best model~~
* ~~Refactor the training function to pass both measures: the default `loss` and `val_loss`~~
* Test when and number of epochs to switch to `val_loss` (this prevents overfitting!)
* Test smaller batch sizes and scaling them up
* test output models (both tf1.13 and tflite) for production
* hope this one passes
Expand Down
37 changes: 37 additions & 0 deletions data_prep_system_configuration.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"random_split_directories": [
"wake-word/",
"not-wake-word/background/"
],
"even_odd_split_directories": [
"wake-word/variations/"
],
"three_four_split_directories": [
"not-wake-word/parts/"
],
"root_model_name": "experiment",
"source_directories": [
"background_noise/wake-word/",
"background_noise/wake-word/variations/",
"background_noise/test/wake-word/",
"background_noise/test/wake-word/variations/"
],
"destination_directories": [
"wake-word/background_noise/",
"wake-word/background_noise/variations/",
"test/wake-word/background_noise/",
"test/wake-word/background_noise/variations/"
],

"directories_to_gauss": [
"/wake-word/",
"/wake-word/variations/",
"/not-wake-word/background/",
"/not-wake-word/parts/",
"/test/wake-word/",
"/test/wake-word/variations/",
"/test/not-wake-word/background/",
"/test/not-wake-word/parts/"
]

}
14 changes: 14 additions & 0 deletions data_prep_user_configuration.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{ "audio_source_directory": "flow_test_delete_after/",
"wakeword_model_name": "test_wakeword_model_delete_after",
"pdsounds_directory": "audio/flow_test_delete_after/pdsounds_march2009/mp3/",
"extra_audio_directories_to_process": [
"audio/noises/",
"audio/common_voice/"
],
"extra_audio_directories_labels": [
"non-utterances",
"utterances"
],
"max_files_from_source_directory": 45000,
"max_files_per_destination_directory": 10000
}
97 changes: 97 additions & 0 deletions dialog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
[ {
"dialog_name": "main_menu_optional_dialog",
"dialog_description": "optional dialog to be prompted if the user hasn't fill in the config.json file",
"dialog_content": [
{
"text": "Please enter the relative path to the wakeword recordings directory (ie audio/):\n",
"dialog_type": "input-string-wakeword_recordings_directory"
},
{
"text": "Please enter the name you want to give the wakeword model (ie. 'wakeword_model'):\n",
"dialog_type": "input-string-model_name"
}
]
},
{
"dialog_name": "main_menu_dialog",
"dialog_description": "main menu dialog to choose data prep options",
"dialog_content": [
{
"text": "Please enter your choice\n1. Create base model from wakeword recorder data\n2. Create improved model by generating extra data \n3. Further improve the model by processing and generating more data with more audio directories\n4. Do it all and exit\n5. Exit\n\n",
"dialog_type": "input-numbered-main_choice"

}
]
},
{

"dialog_name": "base_model_menu_dialog",
"dialog_description": "generating the base model menu dialog",
"dialog_content": [
{

"text":"Splitting the data from {source_directory}",
"dialog_type": "inform-splitting_data"
},

{
"text": "Running experimental training to find the optimal test-train split..",
"dialog_type": "inform-experiment_train_test_split"
},

{
"text": "Average test set accuracy: {average_val_acc} \u00B1 {standard_deviation_val_acc}\nAverage train set accuracy: {average_acc} \u00B1 {standard_deviation_acc}",
"dialog_type": "inform-accuracy"
},

{
"text": "{selected_model_name} produces the best results with {selected_model_results}",
"dialog_type": "inform-best_model"
},

{
"text": "Starting incremental training on {random_user_recordings_directory}",
"dialog_type": "inform-incremental_training_start"
},

{
"text": "Incremental training on {random_user_recordings_directory} complete",
"dialog_type": "inform-incremental_training_complete"
},

{
"text": "training {wakeword_model_name} with the new data..",
"dialog_type": "inform-training_start"
},

{
"text": "{wakeword_model_name} training complete",
"dialog_type": "inform-training_complete"
},

{
"text": "changed {selected_model_name} to {wakeword_model_name}",
"dialog_type": "inform-changed_model_name"
},

{
"text": "Average accuracies of the base model...",
"dialog_type": "inform-base_model_accuracies"
},

{
"text": "Original best model: \n{selected_model_name}: {selected_model_results}\n dataset size: {dataset_size}",
"dialog_type": "inform-original_best_base_model_results"
},

{
"text": "Try the model in: precise-listen {wakeword_model_name}.net in your command line. Run 2 to continue generating {wakeword_model_name} data to production quality",
"dialog_type": "inform-continue"
}



]
}

]

0 comments on commit cfcd1a5

Please sign in to comment.