vm

#!/usr/bin/zsh
# Should work also for users of bash, replace the above shebang with the path to 'bash'

# MIT License

# Copyright (c) 2023 Quantius Benignus

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#--------------------------------------------------------------------------

# NAME: vm (named for brevity and quick access, check your PATH for conflicts and rename accordingly if needed.) 

# PREREQUSITES: 
#      - Joplin desktop installation with WebClipper activated (see https://joplinapp.org/)   
#      - whisper.cpp installation (see https://github.com/ggerganov/whisper.cpp) 
#      - recent versions of 'sox', 'curl', 'xsel' command line utilities from your system's repositories.
#--------------------------------------------------------------------------

#Check dependencies
command -v sox &>/dev/null || { echo "sox is required. Please, install sox" >&2 ; exit 1 ; }
command -v curl &>/dev/null || { echo "curl is required. Please, install curl" >&2 ; exit 1 ; }
command -v xsel &>/dev/null || { echo "xsel is required. Please, install xsel" >&2 ; exit 1 ; }
command -v transcribe &>/dev/null || { echo -e "Please, install whisper.cpp (see https://github.com/ggerganov/whisper.cpp)\
\nand create 'transcribe' in your PATH as a symbolic link to the main executable, e.g.\n \
 'ln -s /full/path/to/whisper.cpp/main \$HOME/bin/transcribe'" >&2 ; exit 1 ; }
#Hear the complaints of the above tools:
set -e

#--------------------------------------------------------------------------
#USER CONFIGURATION BLOCK. 
#Please, adjust the variables here to suit your environment:

# Store temp files in memory for speed and to reduce SSD/HDD "grinding":
TEMPD='/dev/shm'
# Default whisper.cpp model file for inference:
model="$TEMPD/ggml-tiny.en.bin"
#model="$TEMPD/ggml-base.bin"
#If Joplin is not running, store transcribed notes in Joplin config. directory for later processing. 
JOPLIND=$HOME'/.config/joplin-desktop/resources'  #of course you can choose another place.
#[ -v $ramf ] || ramf="$(mktemp -p /dev/shm/ vmXXXX)"
# Hardcoded temp wav file to store the voice memo and get overwritten every time (in RAM):
ramf="$TEMPD/vmfile"

#Set the number of processing threads for whisper.cpp inference (adjust for your case):
NTHR=4
#It seems that the optimum number of transcribe threads should equal CPU processing cores:
#NTHR="$(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') ))"

# These are for the Joplin data API. 
# The first parameter is the id of the Joplin notebook where the new note/task will be created.
# The token is generated by the web clipper plugin in Joplin (REST data API server). 
# PLEASE, REPLACE WITH YOUR OWN FROM YOUR INSTANCE OF THE JOPLIN DESKTOP APP FOR LINUX:
NOTEBOOK_ID="cf67deca92......"   # In my case the notebook is named VoiceMemos.
AUTH_TOKEN="dfc7656d765ad7577877b7d7f7e759a9659769b99c99ff9587e96d.........."

#END OF USER CONFIGURATION BLOCK
#--------------------------------------------------------------------------
# Clipboard flag, when set the output goes to the clipboard (for insertion as inline to-do):
unset clip
# Is the Joplin data API up:
unset apiup

show_help() {
echo -e "\n   .~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~."
echo -e "   |  vm - Use speech to create a Joplin note or transcribe to clipboard. |"
echo -e "   |      A showcase of the built-in power of the Linux command line.     |"
echo -e "   *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*\n"
echo " Records a voice memo from the default input channel (microphone),"
echo "  or uses one or multiple audio files as the input." 
echo " Transcribes each input (using whisper.cpp, a C/C++ port of OpenAI's Whisper engine) into text"
echo -e " formated as a note entry for the desktop version of the Joplin note-taking app and \n either:" 
echo "   - sends it to the clipboard for insertion in an existing Joplin note as inline text"
echo "   - or creates a new note in a running instance of the Joplin app"
#    "     using curl to access Joplin's data API (see details below)"
echo -e "   - if Joplin is not running, stores the transcription in a file for later collection\n" 

echo -e " SYNOPSIS: vm [-b|-c|-bc|-cb|--help|-h] ... [filename(s)]\n"

echo "   - 'vm' the default: use the 'tiny' whisper.cpp ASR model file and create a note in Joplin" 
echo "   - 'vm -h|--help' will print this text" 
echo "   - 'vm -b|--base' transcribes to a Joplin note using the larger (more accurate but slower) 'base' model"
echo "   - 'vm -c|--clip' will transcribe with the default 'tiny' model file but output to the clipboard"
echo "   - 'vm -bc' , 'vm -cb', etc. - valid compound options are acceptable"
echo -e "   - any and all non-option arguments are treated as input audio files to be converted\n"
#    "(tested on Ubuntu 22.04 LTS under Gnome version 42.5 (Wayland and X11))"
exit 0 
}

#The GNU enhanced getopt from util-linux required (On MAC OS may need to be installed manually over the existing one)
#uncomment to test your getopts compliance in the unlikely case of parsing issues:
#getopt --test > /dev/null
#if [[ $? -ne 4 ]]; then
#    echo "I'm sorry, 'getopt --test' failed in this environment."
#    exit 1
#fi

SHORT=hcb                # Short options
LONG=help,clip,base      # Long options
# getopt advanced mode - see /usr/share/docs/util-linux/ for details 
# Arguments passed via   -- "$@" for correct parsing
GOTOPTS=$(getopt --options ${SHORT} --longoptions ${LONG} --name "$0" -- "$@")   
# Arguments valid?:
if [[ $? -ne 0 ]]; then
    exit 2
fi

eval set -- "${GOTOPTS}" #eval for safe quoting

while [[ ( $# -gt 0 ) ]]
do
    case "$1" in
        -h|--help) show_help;;
        -c|--clip) clip=1 ; xsel -cb;;
        -b|--base) model="$TEMPD/ggml-base.en.bin";;
        --);;
        *) audio_files=("${audio_files[@]}" "$1");;
    esac
    shift 
done

# You must have moved the model files (e.g. ggml-tiny.en.bin, see case parser above) to the tmpfs TEMPD in memory:
# This can be done once per shell session using your .zshr (or .bashrc) file by placing something like this in it:
# ([ -f /dev/shm/ggml-tiny.en.bin ] || cp /path/to/your/local/whisper.cpp/models/ggml* /dev/shm/)
# If you prefer to set the above on login (even before opening a terminal) place it in ypour .profile file.
[[ -r $model ]] || { echo "Model file not found or not readable!" ; exit 1 ; }

for af in ${audio_files[@]}; do
if [[ -r $af ]]; then 
   echo -e '>>>>>>>>>> Loading audio from file...'$af' >>>>>>>>>>\n'
# Here we are tasking sox with error handling:
   fname=$(basename "$af")
   fbname="$TEMPD/${fname%.*}.wav"
   sox -q $af -t wav $fbname channels 1 rate 16k norm
   wavs=("${wavs[@]}" "$fbname")  
else
   echo -e '>>>>>>>>>>>>>>>>\n File '$af' cannot be read !!!\n>>>>>>>>>>>>>>>\n'
   show_help
fi
done

if [[ ( ${#wavs[@]} -eq 0 ) ]]; then
echo -e '>>>>>>>>>>>>>>>> Speak in the microphone...>>>>>>>>>>>>>>>\n'
# sox typically requires user input (CTRL-C) to stop recording from the microphone, this prevents the script from "overreacting":
   trap "echo -e '>>>>>>>>>>>>>>>> Done recording >>>>>>>>>>>>>>>\n'" SIGINT
# recording in wav format at 16k rate, the only currently accepted by whisper.cpp:
# Attempts to stop on silence of 3s with threshold of 4%, if in doubt press CTRL-C: 
  # rec -t wav $ramf rate 16k channels 1 silence -l  0   1 2.0 1%
   rec -q -t wav $ramf rate 16k channels 1 silence 1 0.1 3% 1 3.6 3%
   wavs=("${wavs[@]}" "$ramf")     
fi

# Let's ping the server to see if the REST API is available (if not, store the json payload for later):
[[ $(curl --silent http://localhost:41184/ping) == 'JoplinClipperServer' ]] && apiup="1"   

for wf in ${wavs[@]}; do   
echo -e '>>>>>>>>>>>>>>>> Now transcribing... >>>>>>>>>>>>>>>\n'
# transcribe is a symbolic link (somewhere in your path) to the compiled "main" executable in the whisper.cpp directory.
# For example: in your ~/bin> create it with `ln -s /full/path/to/whisper.cpp/main $HOME/bin/transcribe`
# The transcribed text is stored in a file (-otxt), in this case /dev/shm/vmfile.txt 
transcribe -pc -nt -t $NTHR -m $model -f $wf -otxt 2>/dev/null

# For tracking the time of creation of notes:
noted="$(date +%Y-%m-%dT%T)" #ISO-8601-ish.  
#If your computer is somehow faster than 1-transcription/sec use: %Y-%m-%dT%T.%2N above 
 
# If clipboard flag is set, we dump everything in the clipboard (make sure xsel is installed, e.g. apt install xsel):
if [[ $clip ]]; then 
   echo -en '>Created on '$noted':\n>' | cat - $wf.txt | xsel -ab 
   echo -e ">>>>>>>>>>>>>>> Appended to clipboard! \n"
else
# Otherwise we will use the Joplin data API:
# Let's prepare a JSON payload (in a temp file in memory) to pass to curl: 
uct="$(date +%s%3N)"
echo -en '{ "title": "'$noted'", "parent_id":"'$NOTEBOOK_ID'", "user_created_time":"'$uct'", "body": "## '$noted'  \\n---\\n> ' > $TEMPD/vmjson 
# In my case parent_id is the Joplin id of a notebook named VoiceMemos  
str="$(< $wf.txt)"
# Whisper detected non-speech events such as (wind blowing): 
str="${str/\(*\)}"   
str="${str/\[*\]}"    
str="${str//'\n'/'\\n'}"  # for proper json, needed by jq when retrieving uncollected  
echo -en $str'\\n\\n "}' >> $TEMPD/vmjson

# Is the REST API available (if not, store the json payload for later):
if [[ $apiup ]]; then  
   # We contact the Joplin REST API with curl using the prepared JSON payload in the file "vmjson":
   curl -d @$TEMPD/vmjson -o $TEMPD/vm-curlout --no-progress-meter "http://localhost:41184/notes?token="$AUTH_TOKEN
   echo -e '\n--------------------  New note created!  -----------------------'
   # You should find the newly created note in the notebook of your choice.
else
   cp $TEMPD'/vmjson' $JOPLIND'/'$noted'n_uncollected.json' 
   echo -e "\nIt seems that Joplin is not runnig! \nMemo saved for later in "$JOPLIND'/'$noted'n_uncollected.json';   
fi
fi
done

# Let's check for uncollected tasks (from when Joplin was not running) and take them home:
if [[ $apiup ]] && [[ $clip -ne 1 ]]; then
#shopt -s nullglob  # only for bash
for jsonf in $JOPLIND/*'_uncollected.json'(N); do
fil="${jsonf##*/}" 
echo -e "\n----- Collecting note from $fil ------\n"
curl -d @$jsonf -o $TEMPD/$fil"-curlout" --no-progress-meter "http://localhost:41184/notes?token="$AUTH_TOKEN
if [[ "$fil" == *"td_uncollected.json" ]]; then
#Recycle adate and load with the alarm timestamp:
adate=$(jq -j '.todo_due' $jsonf)
url="http://localhost:41184/notes/"$(jq -j '.id' $TEMPD/$fil"-curlout")"?token="$AUTH_TOKEN
curl -X PUT -o $TEMPD/$fil"PUT-curlout" --no-progress-meter --data '{"todo_due": "'$adate'"}' $url
fi
#rm $jsonf  - no, let's keep them just in case
mv -i "$jsonf" "${jsonf/_uncollected.json/.json}" 
echo -e "\n----- Collected note. Backup kept as ${fil/_uncollected.json/.json} ------";
done
fi