From ed5353d422bdf0916b40486c4043d54845b6a1b4 Mon Sep 17 00:00:00 2001 From: marco-lancini Date: Thu, 21 Sep 2023 10:12:49 +0100 Subject: [PATCH] wip --- docker/pandoc/Dockerfile | 4 ++++ docker/pandoc/filters/wordcount.lua | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 docker/pandoc/filters/wordcount.lua diff --git a/docker/pandoc/Dockerfile b/docker/pandoc/Dockerfile index cebfeb3..45b2494 100644 --- a/docker/pandoc/Dockerfile +++ b/docker/pandoc/Dockerfile @@ -13,6 +13,10 @@ COPY docker/pandoc/requirements.txt /requirements.txt RUN pip3 install --no-cache-dir -r /requirements.txt \ && rm -f /requirements.txt +# Install custom filters +COPY docker/pandoc/filters/* /root/.pandoc/filters/ + +# Set working directory WORKDIR /data # Command diff --git a/docker/pandoc/filters/wordcount.lua b/docker/pandoc/filters/wordcount.lua new file mode 100644 index 0000000..bcaf9ac --- /dev/null +++ b/docker/pandoc/filters/wordcount.lua @@ -0,0 +1,29 @@ +-- counts words in a document + +words = 0 + +wordcount = { + Str = function(el) + -- we don't count a word if it's entirely punctuation: + if el.text:match("%P") then + words = words + 1 + end + end, + + Code = function(el) + _,n = el.text:gsub("%S+","") + words = words + n + end, + + CodeBlock = function(el) + _,n = el.text:gsub("%S+","") + words = words + n + end +} + +function Pandoc(el) + -- skip metadata, just count body: + el.blocks:walk(wordcount) + print(words .. " words in body") + os.exit(0) +end