forked from standardebooks/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build
executable file
·383 lines (312 loc) · 18 KB
/
build
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#!/bin/bash
usage(){
fmt <<EOF
DESCRIPTION
Build an ebook from a Standard Ebook source directory.
USAGE
build [-v,--verbose] [-k,--kindle] [-b,--kobo] [-c,--check] [-p,--proofreading-css] [-o,--output-dir=DIRECTORY] DIRECTORY [DIRECTORY...]
DIRECTORY is the source directory, which must contain DIRECTORY/src/.
Output is placed in the current working directory, unless a directory is specified with the --output-dir flag.
With the -k flag, use Calibre to create an .azw3 file in addition to epub files.
With the -b flag, create a Kobo-compatible .kepub.epub file in addition to epub files.
With the -c flag, use epubcheck to validate the epub. If -k is also specified and epubcheck fails, don't create a Kindle file.
With the -p flag, insert additional CSS rules that are helpful for proofreading. Output filenames will contain ".proof".
EOF
exit
}
die(){ printf "Error: ${1}\n" 1>&2; exit 1; }
require(){ command -v $1 > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
if [ $# -eq 1 ]; then if [ "$1" = "--help" -o "$1" = "-h" ]; then usage; fi fi
#End boilerplate
#Check for dependencies
require "xsltproc" "Try: apt-get install xsltproc"
require "xmllint" "Try: apt-get install libxml2-utils"
require "xmlstarlet" "Try: apt-get install xmlstarlet"
require "xpath" "Try: apt-get install libxml-xpath-perl"
require "mogrify" "Try: apt-get install imagemagick"
require "zip" "Try: apt-get install zip"
if [ $# -eq 0 ]; then
usage
fi
scriptDir="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
curDir="$(pwd)"
srcDir="."
kindle="false"
kobo="false"
verbose="false"
check="false"
proofreadingCss="false"
dirs=""
updateAsinPath="${scriptDir}/update-asin"
makeUrlSafePath="${scriptDir}/make-url-safe"
svgWidth="1400"
svgHeight="2100"
thumbWidth=$(expr ${svgWidth} "/" 4)
thumbHeight=$(expr ${svgHeight} "/" 4)
destDir="$(pwd)" #no trailing slash
#Some convenience aliases
wj="\xE2\x81\xA0" #word joiner, U+2060
thinsp=" " #thin space, U+2009
nbsp=" " #no-break space, U+00A0
zwnbsp="\xEF\xBB\xBF" #zero-width no-break space, U+FEFF
shy="\xC2\xAD" #soft hyphen, U+00AD
while [ $# -gt 0 ]
do
case "$1" in
-k|--kindle)
kindle="true"
;;
-b|--kobo)
kobo="true"
;;
-c|--check)
check="true"
;;
-v|--verbose)
verbose="true"
;;
-p|--proofreading-css)
proofreadingCss="true"
;;
-o=*|--output-dir=*)
destDir="$(echo "${1}" | sed 's/[-a-zA-Z0-9]*=//')"
;;
*)
dirs=$(printf "%s\n%s" "${dirs}" "$1")
;;
esac
shift
done
if [ "${check}" = "true" ]; then
require "epubcheck" "Try: apt-get install epubcheck"
fi
if [ "${kindle}" = "true" ]; then
require "ebook-convert" "Try: http://calibre-ebook.com/download"
require "ebook-meta" "Try: http://calibre-ebook.com/download"
fi
destDir="$(realpath "${destDir}")"
mkdir --parents "${destDir}" &> /dev/null
if [ ! -d "${destDir}" ]; then
die "Couldn't create output directory."
fi
printf "%s\n" "${dirs}" | while IFS= read -r i;
do
if [ "${i}" = "" ]; then
continue
fi
cd "${curDir}" #Reset when starting the loop over
srcDir="$(realpath "${i%/}")"
if [ ! -d "${srcDir}/src" ]; then
die "${srcDir} doesn't look like a Standard Ebook source directory."
fi
rm --force "${destDir}"/*.epub "${destDir}"/*.epub3 "${destDir}"/*.azw3 "${destDir}"/cover*.jpg "${destDir}"/thumbnail_*portrait.jpg
#Get work title
#We have to use xmlstarlet here because xpath chokes on utf8
title="$(xmlstarlet sel -N dc="http://purl.org/dc/elements/1.1/" -t -m "//dc:title" -v "." -n "${srcDir}/src/epub/content.opf" | head -n 1)"
author="$(xmlstarlet sel -N dc="http://purl.org/dc/elements/1.1/" -t -m "//dc:creator" -v "." -n "${srcDir}/src/epub/content.opf" | head -n 1)"
urlTitle=$("${makeUrlSafePath}" "${title}")
urlAuthor=$("${makeUrlSafePath}" "${author}")
outputFilename="${urlAuthor}_${urlTitle}"
workDir="/tmp/${outputFilename}.epub"
epub2OutputFilename="${outputFilename}"
kindleOutputFilename="${outputFilename}"
koboOutputFilename="${outputFilename}"
outputFilename="${outputFilename}"
if [ "${proofreadingCss}" = "true" ]; then
epub2OutputFilename="${epub2OutputFilename}.proof"
kindleOutputFilename="${kindleOutputFilename}.proof"
koboOutputFilename="${koboOutputFilename}.proof"
outputFilename="${outputFilename}.proof"
fi
epub2OutputFilename="${epub2OutputFilename}.epub"
kindleOutputFilename="${kindleOutputFilename}.azw3"
koboOutputFilename="${koboOutputFilename}.kepub.epub"
outputFilename="${outputFilename}.epub3"
if [ "${verbose}" = "true" ]; then
printf "\tBuilding %s ..." "${outputFilename}"
fi
#Set up our working directory in /tmp/.
rm --force --recursive "${workDir}"
mkdir "${workDir}"
cp --recursive --dereference "${srcDir}/src"/* "${workDir}"
cd "${workDir}"
#Find the epub source directory.
epubDir="$(xpath -e "string(//rootfile/@full-path)" "META-INF/container.xml" 2> /dev/null | sed "s/\/content.opf//")"
#Are we including proofing CSS?
if [ "${proofreadingCss}" = "true" ]; then
cat "${scriptDir}/templates/proofreading.css" >> "${workDir}/${epubDir}/css/local.css"
fi
#Output a pure epub3 file.
zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${outputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1
if [ "${kobo}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf " OK\n"
printf "\tBuilding %s ..." "${koboOutputFilename}"
fi
elif [ "${verbose}" = "true" ]; then
printf " OK\n"
printf "\tBuilding %s ..." "${epub2OutputFilename}"
fi
#Now add epub2 compatibility.
#Set xmllint to use tab indentation.
export XMLLINT_INDENT=$(printf "\t")
#Include compatibility CSS
cat "${scriptDir}/templates/compatibility.css" >> "${workDir}/${epubDir}/css/core.css"
#Simplify tags
"${scriptDir}/simplify-tags" "${workDir}"
#To get popup footnotes in iBooks, we have to change epub:rearnote to epub:footnote.
#Remember to get our custom style selectors too.
find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/epub:type=\"([^\"]*?)rearnote([^\"]*?)\"/epub:type=\"\1footnote\2\"/g"
find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/class=\"([^\"]*?)epub-type-rearnote([^\"]*?)\"/class=\"\1epub-type-footnote\2\"/g"
find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/rearnote/footnote/g"
#Include extra lang tag for accessibility compatibility.
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/xml\:lang\=\"([^\"]+?)\"/lang=\"\1\" xml:lang=\"\1\"/g" "{}" \;
#Typography: replace double and triple em dash characters with extra em dashes.
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⸺/——/g" "{}" \;
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⸻/———/g" "{}" \;
#Typography: replace some other less common characters.
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⅒/1\/10/g" "{}" \;
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/℅/c\/o/g" "{}" \;
#Many e-readers don't support the word joiner character (U+2060 aka ⁠ aka 0xE2 0x81 0xA0).
#They DO, however, support the now-deprecated zero-width non-breaking space, (U+FEFF aka  aka 0xEF 0xBB 0xBF)
#For epubs, do this replacement. Kindle now seems to handle everything fortunately.
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/${wj}/${zwnbsp}/ig" "{}" \;
#Convert svg images to png images
#Mogrify reports the svg as the wrong size, so we have to force the size.
#We also generate a thumbnail for the OPDS feed all in one go. Note that we can't use percentages to resize, since mogrify auto-detects the wrong svg size to begin with.
mogrify -resize "${svgWidth}x${svgHeight}" -format jpg "${workDir}/epub/images/cover.svg"
cp "${workDir}/epub/images/cover.jpg" "${destDir}/cover.jpg"
cp "${workDir}/epub/images/cover.svg" "${destDir}/cover-thumbnail.svg"
mogrify -resize "${thumbWidth}x${thumbHeight}" -quality 100 -format jpg "${destDir}/cover-thumbnail.svg"
rm "${workDir}/epub/images/cover.svg"
rm "${destDir}/cover-thumbnail.svg"
find "${workDir}" -iname "*.svg" -exec sh -c 'rsvg-convert -z 2 -a -f png -o "${0%.svg}.png" "$0"' "{}" \; -exec rm "{}" \; #Convert svg to png, then delete svg. The background flag ensure we get transparency.
find "${workDir}" -type f \( ! -iname "*.jpg" \) -print0 | xargs -0 sed --in-place "s/cover.svg/cover.jpg/g" #Replace references to .svg with .png. Ignore png files, because otherwise this command will corrupt them.
sed --in-place --regexp-extended "s/id=\"cover.jpg\" media\-type=\"image\/svg\+xml\"/id=\"cover.jpg\" media\-type=\"image\/jpeg\"/g" "${workDir}/${epubDir}/content.opf" #Replace mime type declarations in content.opf
find "${workDir}" -type f \( ! -iname "*.png" \) -print0 | xargs -0 sed --in-place "s/\.svg/.png/g" #Replace references to .svg with .png. Ignore png files, because otherwise this command will corrupt them.
sed --in-place --regexp-extended "s/image\/svg\+xml/image\/png/g" "${workDir}/${epubDir}/content.opf" #Replace mime type declarations in content.opf
sed --in-place "s/properties=\"svg\"//g" "${workDir}/${epubDir}/content.opf" #We have to remove these references to satisfy epubcheck.
#At this point we can build the Kobo epub
if [ "${kobo}" = "true" ]; then
cp -r "${workDir}" "${workDir}.kobo"
cd "${workDir}.kobo"
"${scriptDir}/build-kobo" "${workDir}.kobo"
zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${koboOutputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1
rm -rf "${workDir}.kobo"
cd "${workDir}"
if [ "${verbose}" = "true" ]; then
printf " OK\n"
printf "\tBuilding %s ..." "${epub2OutputFilename}"
fi
fi
#Include epub2 cover metadata
coverId="$(xpath -e "string(//item[@properties=\"cover-image\"]/@id)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
sed --in-place --regexp-extended "s/(<metadata.*)/\1<meta content=\"${coverId}\" name=\"cover\" \/>/g" "${workDir}/${epubDir}/content.opf"
#Add metadata to content.opf indicating this file is a Standard Ebooks compatibility build
sed --in-place --regexp-extended "s/<dc:publisher/<meta property=\"se:transform\">compatibility<\/meta>\n\t\t<dc:publisher/g" "${workDir}/${epubDir}/content.opf"
#Generate our NCX file for epub2 compatibility.
#First find the ToC file.
tocFilename="$(xpath -e "string(//item[@properties=\"nav\"]/@href)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
sed --in-place "s/<spine>/<spine toc=\"ncx\">/g" "${workDir}/${epubDir}/content.opf"
sed --in-place "s/<manifest>/<manifest><item href=\"toc.ncx\" id=\"ncx\" media-type=\"application\/x-dtbncx+xml\" \/>/g" "${workDir}/${epubDir}/content.opf"
xsltproc --stringparam cwd "${workDir}/" "${scriptDir}/data/navdoc2ncx.xsl" "${workDir}/${epubDir}/${tocFilename}" > "${workDir}/${epubDir}/toc.ncx"
sed --in-place --regexp-extended "s/ xml\:lang=\"\?\?\"//g" "${workDir}/${epubDir}/toc.ncx"
#Make nicely incrementing navpoint IDs and playOrders
sed --in-place "s/<navMap id=\".*\">/<navMap id=\"navmap\">/" "${workDir}/${epubDir}/toc.ncx"
perl -pi -e 's/\<navPoint id\="idp[0-9]+"/"<navPoint id=\"navpoint-" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
perl -pi -e 's/\<navPoint/"<navPoint playOrder=\"" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
xmllint --c14n "${workDir}/${epubDir}/toc.ncx" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/epub/toc.ncx" --format -
#Convert the guide
#We add the 'text' attribute to the titlepage to tell the reader to start there
xpath -q -e "//nav[@epub:type=\"landmarks\"]/ol/li/a" "${workDir}/${epubDir}/$tocFilename" \
| sed --regexp-extended "s/epub:type=\"([^\"]*)(\s*frontmatter\s*|\s*backmatter\s*)([^\"]*)\"/type=\"\1\3\"/g" \
| sed --regexp-extended "s/epub:type=\"[^\"]*(acknowledgements|bibliography|colophon|copyright-page|cover|dedication|epigraph|foreword|glossary|index|loi|lot|notes|preface|bodymatter|titlepage|toc)[^\"]*\"/type=\"\1\"/g" \
| sed "s/type=\"copyright\-page/type=\"copyright page/g" \
| sed "s/type=\"titlepage/type=\"title-page text/g" \
| sed "s/type=\"appendix/type=\"/g" \
| sed "/type=\"\s*\"/d" \
| sed "s/<a/<reference/g" \
| sed --regexp-extended "s/>(.+)<\/a>/ title=\"\1\" \/>/g" \
| (printf "%s\n" "<guide>" && cat) \
| (cat && printf "%s\n" "</guide>") >> "${workDir}/${epubDir}/content.opf"
sed --in-place "s/<\/package>//g" "${workDir}/${epubDir}/content.opf"
printf "%s\n" "</package>" >> "${workDir}/${epubDir}/content.opf"
xmllint --c14n "${workDir}/${epubDir}/content.opf" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/${epubDir}/content.opf" --format -
#Add some compatibility CSS rules
find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/(page\-break\-(before|after|inside)\s*\:\s*(.+))/\1\n\t-webkit-column-break-\2: \3 \/* For Readium *\//g"
find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/^\s*hyphens\s*\:\s*(.+)/\thyphens: \1\n\tadobe-hyphenate: \1\n\t-webkit-hyphens: \1\n\t-epub-hyphens: \1\n\t-moz-hyphens: \1/g"
find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/^\s*hyphens\s*\:\s*none;/\thyphens: none;\n\tadobe-text-layout: optimizeSpeed; \/* For Nook *\//g"
#Add soft hyphens
"${scriptDir}/hyphenate" --ignore-h-tags "${workDir}"
#Hyphenate screws up our nice XHTML formatting so clean it up for distribution
"${scriptDir}/clean" "${workDir}/"
#Create the compatible epub file
zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${epub2OutputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1
if [ "${verbose}" = "true" ]; then
printf " OK\n"
fi
if [ "${check}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "\tRunning epubcheck ..."
fi
output="$(epubcheck ${destDir}/${epub2OutputFilename} 2>&1)"
if [ $? -ne 0 ]; then
printf "%s\n" "${output}"
exit 1
fi
if [ "${verbose}" = "true" ]; then
printf " OK\n"
fi
fi
if [ "${kindle}" = "true" ]; then
if [ "${verbose}" = "true" ]; then
printf "\tBuilding %s ..." "${kindleOutputFilename}"
fi
epubSource="/tmp/${epub2OutputFilename}.tmp.epub"
#Kindle doesn't go more than 2 levels deep for ToC, so flatten it here. We copy and paste some of the code above...
#later we should update it to a less hacky way of doing things.
"${scriptDir}/toc2kindle" "${workDir}/${epubDir}/${tocFilename}"
"${scriptDir}/clean" "${workDir}/${epubDir}/${tocFilename}"
xsltproc --stringparam cwd "${workDir}/" "${scriptDir}/data/navdoc2ncx.xsl" "${workDir}/${epubDir}/${tocFilename}" > "${workDir}/${epubDir}/toc.ncx"
sed --in-place --regexp-extended "s/ xml\:lang=\"\?\?\"//g" "${workDir}/${epubDir}/toc.ncx"
#Make nicely incrementing navpoint IDs and playOrders
sed --in-place "s/<navMap id=\".*\">/<navMap id=\"navmap\">/" "${workDir}/${epubDir}/toc.ncx"
perl -pi -e 's/\<navPoint id\="idp[0-9]+"/"<navPoint id=\"navpoint-" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
perl -pi -e 's/\<navPoint/"<navPoint playOrder=\"" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
xmllint --c14n "${workDir}/${epubDir}/toc.ncx" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/epub/toc.ncx" --format -
#Kindle doesn't recognize most zero-width spaces or word joiners, so just remove them.
#It does recognize the word joiner character, but only in the old mobi7 format. The new format renders them as spaces.
find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/${zwnbsp}//ig" "{}" \;
#Append our kindle compatibility CSS file to the core CSS file.
cat "${scriptDir}/templates/kindle.css" >> "${workDir}/${epubDir}/css/core.css"
#Convert endnotes to Kindle popup compatible notes
if [ -f "${workDir}/${epubDir}/text/endnotes.xhtml" ]; then
"${scriptDir}/endnotes2kindle" "${workDir}/${epubDir}/text/endnotes.xhtml"
#While Kindle now supports soft hyphens, popup endnotes break words but don't insert the hyphen characters. So for now, remove soft hyphens from the endnotes file.
sed --in-place "s/${shy}//g" "${workDir}/${epubDir}/text/endnotes.xhtml"
fi
#Remove the epub:type attribute, as Calibre turns it into just "type"
find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/epub:type=\"[^\"]*?\"//g"
#Re-ceate the compatible epub file
zip -9 --no-dir-entries -X --recurse-paths "${epubSource}" mimetype META-INF "${epubDir}" > /dev/null 2>&1
#Generate the kindle file
coverPath="$(xpath -e "string(//item[@properties=\"cover-image\"]/@href)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
#ebook-convert "${epubSource}" "${destDir}/${kindleOutputFilename}" --mobi-file-type="both" --pretty-print --no-inline-toc --max-toc-links=0 --prefer-metadata-cover --cover="${workDir}/${epubDir}/${coverPath}" > /dev/null 2>&1
ebook-convert "${epubSource}" "${destDir}/${kindleOutputFilename}" --pretty-print --no-inline-toc --max-toc-links=0 --prefer-metadata-cover --cover="${workDir}/${epubDir}/${coverPath}" > /dev/null 2>&1
if [ $? -eq 0 ]; then
#Get the ASIN for the thumbnail
#The ASIN is set to the SHA-1 sum of the book's identifying URL.
bookId=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:[^<]+</dc:identifier>" "${workDir}/${epubDir}/content.opf" | sed --regexp-extended "s/<[^>]+>//g" | sed "s/^url://")
asin=$(printf "%s" "${bookId}" | sha1sum | cut -d " " -f 1)
#Update the ASIN in the generated file
"${updateAsinPath}" "${asin}" "${destDir}/${kindleOutputFilename}" "${workDir}/${kindleOutputFilename}" > /dev/null
mv "${workDir}/${kindleOutputFilename}" "${destDir}/${kindleOutputFilename}"
#Extract the thumbnail
asin="$(ebook-meta "${destDir}/${kindleOutputFilename}" | grep --only-matching --extended-regexp "mobi\-asin:.+" | cut -c11-)"
convert "${workDir}/${epubDir}/${coverPath}" -resize 432x660 "${destDir}/thumbnail_${asin}_EBOK_portrait.jpg" > /dev/null 2>&1
if [ "${verbose}" = "true" ]; then
printf " OK\n"
fi
fi
fi
done