-
Notifications
You must be signed in to change notification settings - Fork 4
/
Google_Longaudio_API.py
96 lines (73 loc) · 2.97 KB
/
Google_Longaudio_API.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
filepath = "audio_wav/"
output_filepath = "Transcripts/"
#bucketname='speech2textaudio'
from pydub import AudioSegment
import io
import os
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import wave
from google.cloud import storage
def mp3_to_wav(audio_file_name):
if audio_file_name.split('.')[1] == 'mp3':
sound = AudioSegment.from_mp3(audio_file_name)
audio_file_name = audio_file_name.split('.')[0] + '.wav'
sound.export(audio_file_name, format="wav")
def frame_rate_channel(audio_file_name):
with wave.open(audio_file_name, "rb") as wave_file:
frame_rate = wave_file.getframerate()
print("\n\n** FILE CONVERSION SUCCESSFUL **")
channels = wave_file.getnchannels()
return frame_rate,channels
def stereo_to_mono(audio_file_name):
sound = AudioSegment.from_wav(audio_file_name)
sound = sound.set_channels(1)
sound.export(audio_file_name, format="wav")
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
def delete_blob(bucket_name, blob_name):
"""Deletes a blob from the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.delete()
def google_transcribe(audio_file_name):
file_name = filepath + audio_file_name
mp3_to_wav(file_name)
# The name of the audio file to transcribe
frame_rate, channels = frame_rate_channel(file_name)
if channels > 1:
stereo_to_mono(file_name)
bucket_name = 'speech2textaudio'
source_file_name = filepath + audio_file_name
destination_blob_name = audio_file_name
upload_blob(bucket_name, source_file_name, destination_blob_name)
gcs_uri = 'gs://speech2textaudio/' + audio_file_name
transcript = ''
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=frame_rate,
language_code='en-US')
# Detects speech in the audio file
operation = client.long_running_recognize(config, audio)
response = operation.result(timeout=10000)
for result in response.results:
transcript += result.alternatives[0].transcript
delete_blob(bucket_name, destination_blob_name)
return transcript
def write_transcripts(transcript_filename,transcript):
f= open(output_filepath + transcript_filename,"w+")
f.write(transcript)
f.close()
if __name__ == "__main__":
for audio_file_name in os.listdir(filepath):
transcript = google_transcribe(audio_file_name)
transcript_filename = audio_file_name.split('.')[0] + '.txt'
write_transcripts(transcript_filename,transcript)