Merge branch 'secretsauceai:dev' into dev

secretsauceai · Jan 10, 2022 · cfcd1a5 · cfcd1a5
2 parents 5cb7bfc + 022a758
commit cfcd1a5
Show file tree

Hide file tree

Showing 5 changed files with 152 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,7 +15,6 @@ __pycache__/
 *.pb
 *.params
 *.net
-*.json
 *.pbtxt
 *.wav
 

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# wakeword-data-prep
+# Precise Wakeword Model Maker
 This is a work in progress! 
 
 After collecting your wake word data set with the [wakeword data collection tool](https://github.com/AmateurAcademic/wakeword-recorder-py), you can use this tool to:
@@ -45,8 +45,9 @@ This is still a work in progress.
 * ~~adding noise~~
     * ~~Gaussian noise~~
     * ~~background noise (precise-add-noise)~~
-* Refactor model analytics and choosing the best model
-* Refactor the training function to pass both measures: the default `loss` and `val_loss`
+* ~~Refactor model analytics and choosing the best model~~
+* ~~Refactor the training function to pass both measures: the default `loss` and `val_loss`~~
+* Test when and number of epochs to switch to `val_loss` (this prevents overfitting!)
 * Test smaller batch sizes and scaling them up
 * test output models (both tf1.13 and tflite) for production
    * hope this one passes 

diff --git a/data_prep_system_configuration.json b/data_prep_system_configuration.json
@@ -0,0 +1,37 @@
+{
+    "random_split_directories": [
+        "wake-word/",
+        "not-wake-word/background/"
+        ],
+    "even_odd_split_directories": [
+        "wake-word/variations/"
+        ],
+    "three_four_split_directories": [
+        "not-wake-word/parts/"
+    ],
+    "root_model_name": "experiment",
+    "source_directories": [
+        "background_noise/wake-word/",
+        "background_noise/wake-word/variations/",
+        "background_noise/test/wake-word/",
+        "background_noise/test/wake-word/variations/"
+        ],
+    "destination_directories": [
+        "wake-word/background_noise/",
+        "wake-word/background_noise/variations/",
+        "test/wake-word/background_noise/",
+        "test/wake-word/background_noise/variations/"
+        ],
+
+    "directories_to_gauss": [
+        "/wake-word/",
+        "/wake-word/variations/",
+        "/not-wake-word/background/",
+        "/not-wake-word/parts/",
+        "/test/wake-word/",
+        "/test/wake-word/variations/",
+        "/test/not-wake-word/background/",
+        "/test/not-wake-word/parts/"
+        ]
+
+}
diff --git a/data_prep_user_configuration.json b/data_prep_user_configuration.json
@@ -0,0 +1,14 @@
+{   "audio_source_directory": "flow_test_delete_after/",
+    "wakeword_model_name": "test_wakeword_model_delete_after",
+    "pdsounds_directory": "audio/flow_test_delete_after/pdsounds_march2009/mp3/",
+    "extra_audio_directories_to_process": [ 
+        "audio/noises/",
+        "audio/common_voice/"
+    ],
+    "extra_audio_directories_labels": [
+        "non-utterances",
+        "utterances"
+    ],
+    "max_files_from_source_directory": 45000,
+    "max_files_per_destination_directory": 10000
+}
diff --git a/dialog.json b/dialog.json
@@ -0,0 +1,97 @@
+[ {
+    "dialog_name": "main_menu_optional_dialog",
+    "dialog_description": "optional dialog to be prompted if the user hasn't fill in the config.json file",
+    "dialog_content": [
+        {
+            "text": "Please enter the relative path to the wakeword recordings directory (ie audio/):\n",
+            "dialog_type": "input-string-wakeword_recordings_directory"
+        },
+        {
+            "text": "Please enter the name you want to give the wakeword model (ie. 'wakeword_model'):\n",
+            "dialog_type": "input-string-model_name"
+        }
+    ]
+},
+    {
+        "dialog_name": "main_menu_dialog",
+        "dialog_description": "main menu dialog to choose data prep options",
+        "dialog_content": [
+            {
+                "text": "Please enter your choice\n1. Create base model from wakeword recorder data\n2. Create improved model by generating extra data \n3. Further improve the model by processing and generating more data with more audio directories\n4. Do it all and exit\n5. Exit\n\n",
+                "dialog_type": "input-numbered-main_choice"
+
+            }
+        ]
+},
+    {
+
+        "dialog_name": "base_model_menu_dialog",
+        "dialog_description": "generating the base model menu dialog",
+        "dialog_content": [
+            {
+
+                "text":"Splitting the data from {source_directory}",
+                "dialog_type": "inform-splitting_data"
+            },
+
+            {
+                "text": "Running experimental training to find the optimal test-train split..",
+                "dialog_type": "inform-experiment_train_test_split"
+            }, 
+
+            {
+                "text": "Average test set accuracy: {average_val_acc} \u00B1 {standard_deviation_val_acc}\nAverage train set accuracy: {average_acc} \u00B1 {standard_deviation_acc}",
+                "dialog_type": "inform-accuracy"
+            },
+
+            {
+                "text": "{selected_model_name} produces the best results with {selected_model_results}",
+                "dialog_type": "inform-best_model"
+            },
+
+            {
+                "text": "Starting incremental training on {random_user_recordings_directory}",
+                "dialog_type": "inform-incremental_training_start"
+            },
+
+            {
+                "text": "Incremental training on {random_user_recordings_directory} complete",
+                "dialog_type": "inform-incremental_training_complete"
+            },
+
+            {
+                "text": "training {wakeword_model_name} with the new data..",
+                "dialog_type": "inform-training_start"
+            },
+
+            {
+                "text": "{wakeword_model_name} training complete",
+                "dialog_type": "inform-training_complete"
+            },
+
+            {
+                "text": "changed {selected_model_name} to {wakeword_model_name}",
+                "dialog_type": "inform-changed_model_name"
+            },
+
+            {
+                "text": "Average accuracies of the base model...",
+                "dialog_type": "inform-base_model_accuracies"
+            },
+
+            {
+                "text": "Original best model: \n{selected_model_name}: {selected_model_results}\n dataset size: {dataset_size}",
+                "dialog_type": "inform-original_best_base_model_results"
+            },
+
+            {
+                "text": "Try the model in: precise-listen {wakeword_model_name}.net in your command line. Run 2 to continue generating {wakeword_model_name} data to production quality",
+                "dialog_type": "inform-continue"
+            }
+
+
+
+        ]
+    }
+
+]
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,7 +15,6 @@ __pycache__/ @@
     *.pb
     *.params
     *.net
-    *.json
     *.pbtxt
     *.wav
@@ Expand Down @@