-
Notifications
You must be signed in to change notification settings - Fork 0
/
document2audio.sh
147 lines (127 loc) · 5.19 KB
/
document2audio.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Script Name: Document2Audio.sh
# This script converts a PDF document to audio and streams it to a VLC server.
# Prompt for Cursor 0.42.3:
# Create a robust and modular shell script to convert either a flattened or
# unflattened PDF into a enumeration of speech synthesized files cut by the section
# of the PDF with that being the title and saved in the OPUS file format and delivered
# to a VLC via the IP Address of the server as the command line argument along with
# using "say" commnad in MacOS for the speech synthesis and remove all extroneous
# characters, charts, and pictures inside the PDF. Please convert the PDF into text,
# then into cut by section of the PDF by the header title as the filename title for the
# opus audio file, and then convert the text into audio using say.
# Engine used: gpt-4o
# Second Prompt for Cursor 0.42.3:
# Instead of streaming, could you instead add the files to a playlist that is the same as
# the filename of the PDF inside the VLC on the computer?
# Engine used: gpt-4o
#!/bin/bash
# Check if the correct number of arguments is provided
if [ "$#" -lt 1 ]; then
echo "Usage: $0 <PDF_FILE> [--verbose]"
exit 1
fi
PDF_FILE="$1"
VERBOSE=false
MAX_CONCURRENT_JOBS=4
# Check for the verbose flag
if [ "$#" -eq 2 ] && [ "$2" == "--verbose" ]; then
VERBOSE=true
fi
OUTPUT_DIR="${PDF_FILE%.*}_audio"
IMAGE_DIR="${PDF_FILE%.*}_images"
TEXT_DIR="${PDF_FILE%.*}_text"
PLAYLIST_FILE="${PDF_FILE%.*}.m3u"
# Create necessary directories
mkdir -p "$OUTPUT_DIR" "$IMAGE_DIR" "$TEXT_DIR"
echo "Starting conversion process for $PDF_FILE"
echo "Output directories created at $OUTPUT_DIR, $IMAGE_DIR, and $TEXT_DIR"
# Function to update the progress bar
update_progress_bar() {
local current=$1
local total=$2
local title=$3
local bar_length=50
local progress=$((current * 100 / total))
local filled_length=$((progress * bar_length / 100))
local bar=$(printf "%-${bar_length}s" "#" | sed "s/ /#/g")
local empty=$(printf "%-${bar_length}s" " " | sed "s/ / /g")
tput civis # Hide cursor
printf "\r%s: [%-${bar_length}s] %d%%" "$title" "${bar:0:filled_length}${empty:filled_length}" "$progress"
tput cnorm # Show cursor
}
# Convert PDF to PNG images
echo "Converting PDF to PNG images..."
page_count=$(pdfinfo "$PDF_FILE" | grep Pages | awk '{print $2}')
for ((page=0; page<page_count; page++)); do
output_file="$IMAGE_DIR/page_$(printf "%03d" $page).png"
if [ "$VERBOSE" = true ]; then
magick -density 300 "$PDF_FILE[$page]" "$output_file" &
else
magick -quiet -density 300 "$PDF_FILE[$page]" "$output_file" &
fi
pid=$!
current_progress=0
update_progress_bar $page $page_count "Converting Page $((page + 1))/$((page_count)) to PNG"
while kill -0 $pid 2>/dev/null; do
if [ -f "$output_file" ]; then
break
fi
if [ $current_progress -ge 100 ]; then
current_progress=0
fi
done
update_progress_bar $page+1 $page_count "Converting Page $((page + 1))/$((page_count)) to PNG"
done
echo "\nPDF conversion to images completed. Images saved in $IMAGE_DIR\n"
# Perform OCR on each image
echo "Performing OCR on images..."
image_count=$(ls "$IMAGE_DIR"/*.png | wc -l)
current_image=0
for image_file in "$IMAGE_DIR"/*.png; do
base_name=$(basename "$image_file" .png)
text_file="$TEXT_DIR/${base_name}"
tesseract "$image_file" "$text_file" --psm 1 &> /dev/null
current_image=$((current_image + 1))
update_progress_bar $current_image $image_count "PNG to Text OCR"
done
update_progress_bar $image_count $image_count "PNG to Text OCR"
echo "\nOCR completed. Text saved in $TEXT_DIR\n"
# Create a new playlist file
echo "Creating playlist file $PLAYLIST_FILE"
echo "#EXTM3U" > "$PLAYLIST_FILE"
# Convert each page's text to audio and add to playlist
current_page=0
active_jobs=0
pids=()
for text_file in "$TEXT_DIR"/page_*.txt; do
page_number=$(basename "$text_file" .txt | sed 's/page_//')
audio_file="$OUTPUT_DIR/page_${page_number}.aiff"
# Run the say command in the background
say -o "$audio_file" "$(<"$text_file")" &
pids+=($!)
active_jobs=$((active_jobs + 1))
current_page=$((current_page + 1))
# Wait for some jobs to finish if the limit is reached
if [ "$active_jobs" -ge "$MAX_CONCURRENT_JOBS" ]; then
while [ "$active_jobs" -ge "$MAX_CONCURRENT_JOBS" ]; do
for pid in "${pids[@]}"; do
if ! kill -0 $pid 2>/dev/null; then
active_jobs=$((active_jobs - 1))
pids=(${pids[@]/$pid})
fi
done
sleep 0.1
update_progress_bar $current_page $page_count "Text to Audio Conversion"
done
fi
# Update progress bar based on completed files
completed_files=$(ls "$OUTPUT_DIR"/*.opus 2>/dev/null | wc -l)
#update_progress_bar $current_page $page_count "Text to Audio Conversion"
# Add audio file to playlist
echo "#EXTINF:-1,Page $page_number" >> "$PLAYLIST_FILE"
echo "$audio_file" >> "$PLAYLIST_FILE"
done
# Wait for all background processes to complete
wait
update_progress_bar $page_count $page_count "Text to Audio Conversion"
echo "\nConversion complete. Playlist created: $PLAYLIST_FILE\n"