forked from standardebooks/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean
executable file
·113 lines (91 loc) · 3.74 KB
/
clean
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
usage(){
fmt <<EOF
DESCRIPTION
Prettify individual source files, or all XHTML and SVG files in a source directory, including canonicalizing XML and minifying SVGs. Note that this only prettifies the source code; it doesn't perform typography changes.
USAGE
clean [-v,--verbose] [-s,--single-lines] DIRECTORY,FILE [DIRECTORY,FILE...]
DIRECTORY is a directory containing one or more XHTML or SVG files.
FILE is an XHTML or SVG file.
Multiple directories and files can be included in a single invocation.
With --single-lines, remove hard line wrapping.
EOF
exit
}
die(){ printf "Error: ${1}\n" 1>&2; exit 1; }
require(){ command -v $1 > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
if [ $# -eq 1 ]; then if [ "$1" = "--help" -o "$1" = "-h" ]; then usage; fi fi
#End boilerplate
#Check for dependencies
require "recode" "Try: sudo apt-get install recode"
require "xml2asc" "Try: sudo apt-get install html-xml-utils"
if [ $# -eq 0 ]; then
usage
fi
verbose="false"
singleLines="false"
dirs=""
while [ $# -gt 0 ]
do
case "$1" in
-v|--verbose)
verbose="true"
;;
-s|--single-lines)
singleLines="true"
;;
*)
dirs=$(printf "%s\n%s" "${dirs}" "$1")
;;
esac
shift
done
#Set xmllint to use tab indentation.
export XMLLINT_INDENT=$(printf "\t")
printf "%s\n" "${dirs}" | while IFS= read -r i;
do
if [ "${i}" = "" ]; then
continue
fi
srcDir="$(realpath "${i%/}")"
isDir="false"
if [ -d "${srcDir}" ]; then
isDir="true"
fi
if [ "${verbose}" = "true" ]; then
printf "Cleaning %s ..." "${srcDir}"
fi
if [ "${isDir}" = "true" ]; then
if [ "${singleLines}" = "true" ]; then
if [ -f "${srcDir}"/src/epub/text/colophon.xhtml ]; then
cp "${srcDir}"/src/epub/text/colophon.xhtml /tmp/colophon.xhtml
fi
find "${srcDir}" -iname "*.xhtml" -type f -exec sh -c "perl -i -pe \"s/\n/ /\" "{}"" \;
find "${srcDir}" -iname "*.xhtml" -type f -exec sh -c "sed --in-place --regexp-extended \"s|\s+| |g\" "{}"" \;
if [ -f "${srcDir}"/src/epub/text/colophon.xhtml ]; then
cp /tmp/colophon.xhtml "${srcDir}"/src/epub/text/colophon.xhtml
fi
fi
#Epub3 doesn't allow named entities, so convert them to their unicode equivalents
#First we use xml2asc to convert *all* special characters to named entities.
#Then recode converts named entities to unicode.
find "${srcDir}" -iname "*.xhtml" -type f -exec sh -c "xml2asc < "{}" | recode HTML | sed \"s/&/&/g\" > /tmp/tmp.xhtml; mv /tmp/tmp.xhtml "{}"" \;
#Clean files with xmllint.
#The --c14n flag removes the XML declaration, so we re-add it before formatting. Otherwise we get problems.
find "${srcDir}" -iname "*.xhtml" -type f -exec sh -c "xmllint --c14n "{}" | (printf '%s\n' '<?xml version=\"1.0\" encoding=\"UTF-8\"?>' && cat) | xmllint --output "{}" --format -" \;
find "${srcDir}" \( -iname "*.svg" -o -iname "*.opf" -o -iname "*.xml" \) -type f -exec sh -c "xmllint --c14n "{}" | (printf '%s\n' '<?xml version=\"1.0\" encoding=\"UTF-8\"?>' && cat) | xmllint --output "{}" --format -" \;
#Add trailing newlines to files that don't have one.
find "${srcDir}" \( -iname "*.xhtml" -o -iname "*.svg" -o -iname "*.css" -o -iname "*.opf" -o -iname "*.xml" \) -type f -exec sed -i -e '$a\' "{}" \;
else
if [ "${singleLines}" = "true" ]; then
perl -i -pe "s/\n/ /" "${srcDir}"
sed --in-place --regexp-extended "s|\s+| |g" "${srcDir}"
fi
xml2asc < "${srcDir}" | recode HTML | sed "s/&/&/g" > /tmp/tmp.xhtml; mv /tmp/tmp.xhtml "${srcDir}"
xmllint --c14n "${srcDir}" | (printf '%s\n' '<?xml version="1.0" encoding="UTF-8"?>' && cat) | xmllint --output "${srcDir}" --format -
sed --in-place -e '$a\' "${srcDir}"
fi
if [ "${verbose}" = "true" ]; then
printf "%s\n" " OK"
fi
done