-
Notifications
You must be signed in to change notification settings - Fork 2
/
vm
231 lines (200 loc) · 11.6 KB
/
vm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#!/usr/bin/zsh
# Should work also for users of bash, replace the above shebang with the path to 'bash'
# MIT License
# Copyright (c) 2023 Quantius Benignus
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#--------------------------------------------------------------------------
# NAME: vm (named for brevity and quick access, check your PATH for conflicts and rename accordingly if needed.)
# PREREQUSITES:
# - Joplin desktop installation with WebClipper activated (see https://joplinapp.org/)
# - whisper.cpp installation (see https://github.com/ggerganov/whisper.cpp)
# - recent versions of 'sox', 'curl', 'xsel' command line utilities from your system's repositories.
#--------------------------------------------------------------------------
#Check dependencies
command -v sox &>/dev/null || { echo "sox is required. Please, install sox" >&2 ; exit 1 ; }
command -v curl &>/dev/null || { echo "curl is required. Please, install curl" >&2 ; exit 1 ; }
command -v xsel &>/dev/null || { echo "xsel is required. Please, install xsel" >&2 ; exit 1 ; }
command -v transcribe &>/dev/null || { echo -e "Please, install whisper.cpp (see https://github.com/ggerganov/whisper.cpp)\
\nand create 'transcribe' in your PATH as a symbolic link to the main executable, e.g.\n \
'ln -s /full/path/to/whisper.cpp/main \$HOME/bin/transcribe'" >&2 ; exit 1 ; }
#Hear the complaints of the above tools:
set -e
#--------------------------------------------------------------------------
#USER CONFIGURATION BLOCK.
#Please, adjust the variables here to suit your environment:
# Store temp files in memory for speed and to reduce SSD/HDD "grinding":
TEMPD='/dev/shm'
# Default whisper.cpp model file for inference:
model="$TEMPD/ggml-tiny.en.bin"
#model="$TEMPD/ggml-base.bin"
#If Joplin is not running, store transcribed notes in Joplin config. directory for later processing.
JOPLIND=$HOME'/.config/joplin-desktop/resources' #of course you can choose another place.
#[ -v $ramf ] || ramf="$(mktemp -p /dev/shm/ vmXXXX)"
# Hardcoded temp wav file to store the voice memo and get overwritten every time (in RAM):
ramf="$TEMPD/vmfile"
#Set the number of processing threads for whisper.cpp inference (adjust for your case):
NTHR=4
#It seems that the optimum number of transcribe threads should equal CPU processing cores:
#NTHR="$(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') ))"
# These are for the Joplin data API.
# The first parameter is the id of the Joplin notebook where the new note/task will be created.
# The token is generated by the web clipper plugin in Joplin (REST data API server).
# PLEASE, REPLACE WITH YOUR OWN FROM YOUR INSTANCE OF THE JOPLIN DESKTOP APP FOR LINUX:
NOTEBOOK_ID="cf67deca92......" # In my case the notebook is named VoiceMemos.
AUTH_TOKEN="dfc7656d765ad7577877b7d7f7e759a9659769b99c99ff9587e96d.........."
#END OF USER CONFIGURATION BLOCK
#--------------------------------------------------------------------------
# Clipboard flag, when set the output goes to the clipboard (for insertion as inline to-do):
unset clip
# Is the Joplin data API up:
unset apiup
show_help() {
echo -e "\n .~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~."
echo -e " | vm - Use speech to create a Joplin note or transcribe to clipboard. |"
echo -e " | A showcase of the built-in power of the Linux command line. |"
echo -e " *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*\n"
echo " Records a voice memo from the default input channel (microphone),"
echo " or uses one or multiple audio files as the input."
echo " Transcribes each input (using whisper.cpp, a C/C++ port of OpenAI's Whisper engine) into text"
echo -e " formated as a note entry for the desktop version of the Joplin note-taking app and \n either:"
echo " - sends it to the clipboard for insertion in an existing Joplin note as inline text"
echo " - or creates a new note in a running instance of the Joplin app"
# " using curl to access Joplin's data API (see details below)"
echo -e " - if Joplin is not running, stores the transcription in a file for later collection\n"
echo -e " SYNOPSIS: vm [-b|-c|-bc|-cb|--help|-h] ... [filename(s)]\n"
echo " - 'vm' the default: use the 'tiny' whisper.cpp ASR model file and create a note in Joplin"
echo " - 'vm -h|--help' will print this text"
echo " - 'vm -b|--base' transcribes to a Joplin note using the larger (more accurate but slower) 'base' model"
echo " - 'vm -c|--clip' will transcribe with the default 'tiny' model file but output to the clipboard"
echo " - 'vm -bc' , 'vm -cb', etc. - valid compound options are acceptable"
echo -e " - any and all non-option arguments are treated as input audio files to be converted\n"
# "(tested on Ubuntu 22.04 LTS under Gnome version 42.5 (Wayland and X11))"
exit 0
}
#The GNU enhanced getopt from util-linux required (On MAC OS may need to be installed manually over the existing one)
#uncomment to test your getopts compliance in the unlikely case of parsing issues:
#getopt --test > /dev/null
#if [[ $? -ne 4 ]]; then
# echo "I'm sorry, 'getopt --test' failed in this environment."
# exit 1
#fi
SHORT=hcb # Short options
LONG=help,clip,base # Long options
# getopt advanced mode - see /usr/share/docs/util-linux/ for details
# Arguments passed via -- "$@" for correct parsing
GOTOPTS=$(getopt --options ${SHORT} --longoptions ${LONG} --name "$0" -- "$@")
# Arguments valid?:
if [[ $? -ne 0 ]]; then
exit 2
fi
eval set -- "${GOTOPTS}" #eval for safe quoting
while [[ ( $# -gt 0 ) ]]
do
case "$1" in
-h|--help) show_help;;
-c|--clip) clip=1 ; xsel -cb;;
-b|--base) model="$TEMPD/ggml-base.en.bin";;
--);;
*) audio_files=("${audio_files[@]}" "$1");;
esac
shift
done
# You must have moved the model files (e.g. ggml-tiny.en.bin, see case parser above) to the tmpfs TEMPD in memory:
# This can be done once per shell session using your .zshr (or .bashrc) file by placing something like this in it:
# ([ -f /dev/shm/ggml-tiny.en.bin ] || cp /path/to/your/local/whisper.cpp/models/ggml* /dev/shm/)
# If you prefer to set the above on login (even before opening a terminal) place it in ypour .profile file.
[[ -r $model ]] || { echo "Model file not found or not readable!" ; exit 1 ; }
for af in ${audio_files[@]}; do
if [[ -r $af ]]; then
echo -e '>>>>>>>>>> Loading audio from file...'$af' >>>>>>>>>>\n'
# Here we are tasking sox with error handling:
fname=$(basename "$af")
fbname="$TEMPD/${fname%.*}.wav"
sox -q $af -t wav $fbname channels 1 rate 16k norm
wavs=("${wavs[@]}" "$fbname")
else
echo -e '>>>>>>>>>>>>>>>>\n File '$af' cannot be read !!!\n>>>>>>>>>>>>>>>\n'
show_help
fi
done
if [[ ( ${#wavs[@]} -eq 0 ) ]]; then
echo -e '>>>>>>>>>>>>>>>> Speak in the microphone...>>>>>>>>>>>>>>>\n'
# sox typically requires user input (CTRL-C) to stop recording from the microphone, this prevents the script from "overreacting":
trap "echo -e '>>>>>>>>>>>>>>>> Done recording >>>>>>>>>>>>>>>\n'" SIGINT
# recording in wav format at 16k rate, the only currently accepted by whisper.cpp:
# Attempts to stop on silence of 3s with threshold of 4%, if in doubt press CTRL-C:
# rec -t wav $ramf rate 16k channels 1 silence -l 0 1 2.0 1%
rec -q -t wav $ramf rate 16k channels 1 silence 1 0.1 3% 1 3.6 3%
wavs=("${wavs[@]}" "$ramf")
fi
# Let's ping the server to see if the REST API is available (if not, store the json payload for later):
[[ $(curl --silent http://localhost:41184/ping) == 'JoplinClipperServer' ]] && apiup="1"
for wf in ${wavs[@]}; do
echo -e '>>>>>>>>>>>>>>>> Now transcribing... >>>>>>>>>>>>>>>\n'
# transcribe is a symbolic link (somewhere in your path) to the compiled "main" executable in the whisper.cpp directory.
# For example: in your ~/bin> create it with `ln -s /full/path/to/whisper.cpp/main $HOME/bin/transcribe`
# The transcribed text is stored in a file (-otxt), in this case /dev/shm/vmfile.txt
transcribe -pc -nt -t $NTHR -m $model -f $wf -otxt 2>/dev/null
# For tracking the time of creation of notes:
noted="$(date +%Y-%m-%dT%T)" #ISO-8601-ish.
#If your computer is somehow faster than 1-transcription/sec use: %Y-%m-%dT%T.%2N above
# If clipboard flag is set, we dump everything in the clipboard (make sure xsel is installed, e.g. apt install xsel):
if [[ $clip ]]; then
echo -en '>Created on '$noted':\n>' | cat - $wf.txt | xsel -ab
echo -e ">>>>>>>>>>>>>>> Appended to clipboard! \n"
else
# Otherwise we will use the Joplin data API:
# Let's prepare a JSON payload (in a temp file in memory) to pass to curl:
uct="$(date +%s%3N)"
echo -en '{ "title": "'$noted'", "parent_id":"'$NOTEBOOK_ID'", "user_created_time":"'$uct'", "body": "## '$noted' \\n---\\n> ' > $TEMPD/vmjson
# In my case parent_id is the Joplin id of a notebook named VoiceMemos
str="$(< $wf.txt)"
# Whisper detected non-speech events such as (wind blowing):
str="${str/\(*\)}"
str="${str/\[*\]}"
str="${str//'\n'/'\\n'}" # for proper json, needed by jq when retrieving uncollected
echo -en $str'\\n\\n "}' >> $TEMPD/vmjson
# Is the REST API available (if not, store the json payload for later):
if [[ $apiup ]]; then
# We contact the Joplin REST API with curl using the prepared JSON payload in the file "vmjson":
curl -d @$TEMPD/vmjson -o $TEMPD/vm-curlout --no-progress-meter "http://localhost:41184/notes?token="$AUTH_TOKEN
echo -e '\n-------------------- New note created! -----------------------'
# You should find the newly created note in the notebook of your choice.
else
cp $TEMPD'/vmjson' $JOPLIND'/'$noted'n_uncollected.json'
echo -e "\nIt seems that Joplin is not runnig! \nMemo saved for later in "$JOPLIND'/'$noted'n_uncollected.json';
fi
fi
done
# Let's check for uncollected tasks (from when Joplin was not running) and take them home:
if [[ $apiup ]] && [[ $clip -ne 1 ]]; then
#shopt -s nullglob # only for bash
for jsonf in $JOPLIND/*'_uncollected.json'(N); do
fil="${jsonf##*/}"
echo -e "\n----- Collecting note from $fil ------\n"
curl -d @$jsonf -o $TEMPD/$fil"-curlout" --no-progress-meter "http://localhost:41184/notes?token="$AUTH_TOKEN
if [[ "$fil" == *"td_uncollected.json" ]]; then
#Recycle adate and load with the alarm timestamp:
adate=$(jq -j '.todo_due' $jsonf)
url="http://localhost:41184/notes/"$(jq -j '.id' $TEMPD/$fil"-curlout")"?token="$AUTH_TOKEN
curl -X PUT -o $TEMPD/$fil"PUT-curlout" --no-progress-meter --data '{"todo_due": "'$adate'"}' $url
fi
#rm $jsonf - no, let's keep them just in case
mv -i "$jsonf" "${jsonf/_uncollected.json/.json}"
echo -e "\n----- Collected note. Backup kept as ${fil/_uncollected.json/.json} ------";
done
fi