Skip to content
This repository has been archived by the owner on Oct 27, 2023. It is now read-only.

Commit

Permalink
Fix name error in audio tutorial (pytorch#1223)
Browse files Browse the repository at this point in the history
* Rename speech_command_recognition_with_torchaudio.py to speech_command_recognition_with_torchaudio_tutorial.py

Updated to run the tutorial at build time.

* Update index.rst

* record in colab, and also outside. move pydub to first install command.

* multiline on one line.

* remove noncolab support.

* text tqdm.

Co-authored-by: Brian Johnson <[email protected]>
Co-authored-by: holly1238 <[email protected]>
  • Loading branch information
3 people authored Apr 26, 2021
1 parent 7835c7d commit 22a8b76
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 43 deletions.
5 changes: 2 additions & 3 deletions index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Welcome to PyTorch Tutorials
:header: Speech Command Recognition
:card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset.
:image: _static/img/thumbnails/cropped/torchaudio-speech.png
:link: intermediate/speech_command_recognition_with_torchaudio.html
:link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html
:tags: Audio

.. Text
Expand Down Expand Up @@ -578,8 +578,7 @@ Additional Resources
:caption: Audio

beginner/audio_preprocessing_tutorial
intermediate/speech_command_recognition_with_torchaudio

intermediate/speech_command_recognition_with_torchaudio_tutorial

.. toctree::
:maxdepth: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,25 @@
"""

# Uncomment the following line to run in Google Colab
# Uncomment the line corresponding to your "runtime type" to run in Google Colab

# CPU:
# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

# GPU:
# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

# For interactive demo at the end:
# !pip install pydub
# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
import sys

import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm.notebook import tqdm

from tqdm import tqdm


######################################################################
Expand Down Expand Up @@ -482,39 +481,40 @@ def predict(tensor):
# will record one second of audio and try to classify it.
#

from google.colab import output as colab_output
from base64 import b64decode
from io import BytesIO
from pydub import AudioSegment


RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""


def record(seconds=1):
display(ipd.Javascript(RECORD))

from google.colab import output as colab_output
from base64 import b64decode
from io import BytesIO
from pydub import AudioSegment

RECORD = (
b"const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n"
b"const b2text = blob => new Promise(resolve => {\n"
b" const reader = new FileReader()\n"
b" reader.onloadend = e => resolve(e.srcElement.result)\n"
b" reader.readAsDataURL(blob)\n"
b"})\n"
b"var record = time => new Promise(async resolve => {\n"
b" stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
b" recorder = new MediaRecorder(stream)\n"
b" chunks = []\n"
b" recorder.ondataavailable = e => chunks.push(e.data)\n"
b" recorder.start()\n"
b" await sleep(time)\n"
b" recorder.onstop = async ()=>{\n"
b" blob = new Blob(chunks)\n"
b" text = await b2text(blob)\n"
b" resolve(text)\n"
b" }\n"
b" recorder.stop()\n"
b"})"
)
RECORD = RECORD.decode("ascii")

print(f"Recording started for {seconds} seconds.")
display(ipd.Javascript(RECORD))
s = colab_output.eval_js("record(%d)" % (seconds * 1000))
print("Recording ended.")
b = b64decode(s.split(",")[1])
Expand All @@ -525,9 +525,11 @@ def record(seconds=1):
return torchaudio.load(filename)


waveform, sample_rate = record()
print(f"Predicted: {predict(waveform)}.")
ipd.Audio(waveform.numpy(), rate=sample_rate)
# Detect whether notebook runs in google colab
if "google.colab" in sys.modules:
waveform, sample_rate = record()
print(f"Predicted: {predict(waveform)}.")
ipd.Audio(waveform.numpy(), rate=sample_rate)


######################################################################
Expand Down

0 comments on commit 22a8b76

Please sign in to comment.