-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcribe.py
85 lines (64 loc) · 3.34 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from openai import OpenAI
from pydub import AudioSegment
import os
import shutil
from post_processing import post_processing
import argparse # Import argparse for command line arguments
client = OpenAI()
# functions to trim the leading silence at the beginning of the audio file
def transcribe_audio(audio_file,output_dir):
audio_path=os.path.join(output_dir,audio_file)
print(f"Start transcribing ${audio_path}")
with open(audio_path, 'rb') as audio_data:
transcription = client.audio.transcriptions.create(
model="whisper-1", file=audio_data)
return transcription.text
# use PyDub to cut audio in smaller chunks
def segment_audio(audio_file,output_dir,duration):
print("Start segmenting audio...")
start_time=0
i=0
if os.path.isdir(output_dir):
print("Directory already exists. Deleting...")
shutil.rmtree(output_dir) # Delete output directory and all its contents
if not os.path.isdir(output_dir): # Create the output directory if it does not exist
print("Creating new directory...")
os.makedirs(output_dir)
while start_time<len(audio_file):
print(f"Segementing audio {i+1}")
segment=audio_file[start_time:start_time+duration]
segment.export(os.path.join(output_dir, f"segemented_{i:02d}.mp3"), format="mp3")
start_time+=duration
i+=1
audio_files=sorted((f for f in os.listdir(output_dir) if f.endswith(".mp3")),
key=lambda f: int(''.join(filter(str.isdigit, f))))
print("Finished segmenting audio.")
return audio_files
# Add this function to parse command line arguments
def parse_args():
parser = argparse.ArgumentParser(description="Transcribe audio files.")
parser.add_argument('--file', required=True, help='Path to the audio file to transcribe')
parser.add_argument('--output', required=True, help='Directory to save the output files')
return parser.parse_args()
if __name__ == "__main__": # Add this block to run the script
args = parse_args() # Parse command line arguments
interview_audio = args.file # Use the file from command line args
output_dir = args.output # Use the output directory from command line args
# Load the original file
interview = AudioSegment.from_file(interview_audio, "mp4")
# Generate output directory name based on the input file name
base_name = os.path.splitext(os.path.basename(interview_audio))[0]
print('The base name is: '+base_name)
output_dir = os.path.join(output_dir, base_name) # Create a subdirectory based on the file name
print(f"The output dir is ${output_dir}")
ten_minutes = 10 * 60 * 1000 # Define duration for each segment
interview_files=segment_audio(interview,output_dir,ten_minutes)
transcriptions = [transcribe_audio(file,output_dir) for file in interview_files]
# Concatenate the transcriptions
full_transcript = ' '.join(transcriptions)
# Output the full_transcript into a file in the output_dir
full_transcript_file_path = os.path.join(output_dir, 'full_transcript.txt') # Specify the output file name
with open(full_transcript_file_path, 'w', encoding='utf-8') as f: # Open the file in write mode
f.write(full_transcript) # Write the transcript to the file
# User assistant to edit transcript
post_processing(output_dir,full_transcript_file_path)