forked from snowplow/documentation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport.sh
executable file
·74 lines (65 loc) · 2.67 KB
/
import.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env bash
tmpexport=$(mktemp /tmp/wordpress-export.XXXXXX)
echo $tmpexport
trap "rm -f $tmpexport" EXIT INT
tmpoutput=$(mktemp -d /tmp/markdown-import.XXXXXX)
echo $tmpoutput
trap "rm -rf $tmpoutput" EXIT INT
# Some clean-up to make the output of wordpress-export-to-markdown better
sed 's/snowplow-docs.site.strattic.io/docs.snowplowanalytics.com/g' $1 |\
xq -x '
INDEX(.rss.channel.item[]; .["wp:post_id"]) as $posts |
.rss.channel.item |= map(
# discard drafts
select(.["wp:status"] | . == "publish" or . == "inherit") |
select(.["wp:post_modified"] > "2022-07-28") |
if .["wp:post_type"] == "docs" then
.["wp:post_name"] = (
# copy the page path to the name so that the pages are organized in a tree
.link | gsub("https://docs.snowplowanalytics.com/docs/"; "") |
rtrimstr("/")
) |
if .["wp:menu_order"] then
.["wp:menu_order"] |= (tonumber * 10)
else . end |
if .["content:encoded"] then
.["content:encoded"] |= (
# make sure preformatted code will be fenced with ```
gsub("(?<x><pre[^>]+preformatted[^>]+>)"; "\(.x)<code>") |
gsub("</pre>"; "</code></pre>") |
# substitute blocks
gsub(
"<!-- wp:block \\{\"ref\":(?<ref>[0-9]+)\\} /-->";
"<pre><code class=\"language-mdx-code-block\">" +
"import Block\(.ref) from \"@site/docs/reusable/\($posts[.ref]["wp:post_name"])/_index.md\"\n\n" +
"<Block\(.ref)/>\n" +
"</code></pre>"
)
)
else . end
else . end)' > $tmpexport
wordpress-export-to-markdown/index.js \
--wizard=false \
--input=$tmpexport \
--output=$tmpoutput \
--post-folders=true \
--include-other-types=true \
--save-attached-images=true \
--save-scraped-images=true
# MDX is really strict!
for file in $(grep -rl '<br>' $tmpoutput); do
sed 's!<br>!<br/>!g' $file | sponge $file
done
# Make links relative
for file in $(grep -rl 'https://docs.snowplowanalytics.com/docs/' $tmpoutput); do
sed 's!https://docs.snowplowanalytics.com/docs/!/docs/!g' $file | sponge $file
done
# Prefix blocks with an underscore and remove front matters
for file in $(ls $tmpoutput/wp_block); do
tail -n+7 $tmpoutput/wp_block/$file/index.md > $tmpoutput/wp_block/$file/_index.md
rm $tmpoutput/wp_block/$file/index.md
done
rm -rf /tmp/docs
mv $tmpoutput/docs /tmp/.
mv $tmpoutput/wp_block /tmp/docs/reusable
rm -r $tmpexport