Fix name error in audio tutorial (pytorch#1223)

* Rename speech_command_recognition_with_torchaudio.py to speech_command_recognition_with_torchaudio_tutorial.py Updated to run the tutorial at build time. * Update index.rst * record in colab, and also outside. move pydub to first install command. * multiline on one line. * remove noncolab support. * text tqdm. Co-authored-by: Brian Johnson <[email protected]> Co-authored-by: holly1238 <[email protected]>
Experience-Monks · Apr 26, 2021 · 22a8b76 · 22a8b76
1 parent 7835c7d
commit 22a8b76
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 43 deletions.
diff --git a/index.rst b/index.rst
@@ -132,7 +132,7 @@ Welcome to PyTorch Tutorials
    :header: Speech Command Recognition
    :card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset.
    :image: _static/img/thumbnails/cropped/torchaudio-speech.png
-   :link: intermediate/speech_command_recognition_with_torchaudio.html
+   :link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html
    :tags: Audio
 
 .. Text
@@ -578,8 +578,7 @@ Additional Resources
    :caption: Audio
 
    beginner/audio_preprocessing_tutorial
-   intermediate/speech_command_recognition_with_torchaudio
-
+   intermediate/speech_command_recognition_with_torchaudio_tutorial
 
 .. toctree::
    :maxdepth: 2

diff --git a/...ch_command_recognition_with_torchaudio.py → ...d_recognition_with_torchaudio_tutorial.py b/...ch_command_recognition_with_torchaudio.py → ...d_recognition_with_torchaudio_tutorial.py
@@ -16,26 +16,25 @@
 
 """
 
-# Uncomment the following line to run in Google Colab
+# Uncomment the line corresponding to your "runtime type" to run in Google Colab
 
 # CPU:
-# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 # GPU:
-# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-# For interactive demo at the end:
-# !pip install pydub
+# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 import torchaudio
+import sys
 
 import matplotlib.pyplot as plt
 import IPython.display as ipd
-from tqdm.notebook import tqdm
+
+from tqdm import tqdm
 
 
 ######################################################################
@@ -482,39 +481,40 @@ def predict(tensor):
 # will record one second of audio and try to classify it.
 #
 
-from google.colab import output as colab_output
-from base64 import b64decode
-from io import BytesIO
-from pydub import AudioSegment
-
-
-RECORD = """
-const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
-const b2text = blob => new Promise(resolve => {
-  const reader = new FileReader()
-  reader.onloadend = e => resolve(e.srcElement.result)
-  reader.readAsDataURL(blob)
-})
-var record = time => new Promise(async resolve => {
-  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
-  recorder = new MediaRecorder(stream)
-  chunks = []
-  recorder.ondataavailable = e => chunks.push(e.data)
-  recorder.start()
-  await sleep(time)
-  recorder.onstop = async ()=>{
-    blob = new Blob(chunks)
-    text = await b2text(blob)
-    resolve(text)
-  }
-  recorder.stop()
-})
-"""
-
 
 def record(seconds=1):
-    display(ipd.Javascript(RECORD))
+
+    from google.colab import output as colab_output
+    from base64 import b64decode
+    from io import BytesIO
+    from pydub import AudioSegment
+
+    RECORD = (
+        b"const sleep  = time => new Promise(resolve => setTimeout(resolve, time))\n"
+        b"const b2text = blob => new Promise(resolve => {\n"
+        b"  const reader = new FileReader()\n"
+        b"  reader.onloadend = e => resolve(e.srcElement.result)\n"
+        b"  reader.readAsDataURL(blob)\n"
+        b"})\n"
+        b"var record = time => new Promise(async resolve => {\n"
+        b"  stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
+        b"  recorder = new MediaRecorder(stream)\n"
+        b"  chunks = []\n"
+        b"  recorder.ondataavailable = e => chunks.push(e.data)\n"
+        b"  recorder.start()\n"
+        b"  await sleep(time)\n"
+        b"  recorder.onstop = async ()=>{\n"
+        b"    blob = new Blob(chunks)\n"
+        b"    text = await b2text(blob)\n"
+        b"    resolve(text)\n"
+        b"  }\n"
+        b"  recorder.stop()\n"
+        b"})"
+    )
+    RECORD = RECORD.decode("ascii")
+
     print(f"Recording started for {seconds} seconds.")
+    display(ipd.Javascript(RECORD))
     s = colab_output.eval_js("record(%d)" % (seconds * 1000))
     print("Recording ended.")
     b = b64decode(s.split(",")[1])
@@ -525,9 +525,11 @@ def record(seconds=1):
     return torchaudio.load(filename)
 
 
-waveform, sample_rate = record()
-print(f"Predicted: {predict(waveform)}.")
-ipd.Audio(waveform.numpy(), rate=sample_rate)
+# Detect whether notebook runs in google colab
+if "google.colab" in sys.modules:
+    waveform, sample_rate = record()
+    print(f"Predicted: {predict(waveform)}.")
+    ipd.Audio(waveform.numpy(), rate=sample_rate)
 
 
 ######################################################################