build

#!/bin/bash
usage(){
	fmt <<EOF
DESCRIPTION
	Build an ebook from a Standard Ebook source directory.

USAGE
	build [-v,--verbose] [-k,--kindle] [-b,--kobo] [-c,--check] [-p,--proofreading-css] [-o,--output-dir=DIRECTORY] DIRECTORY [DIRECTORY...]
		DIRECTORY is the source directory, which must contain DIRECTORY/src/.

		Output is placed in the current working directory, unless a directory is specified with the --output-dir flag.

		With the -k flag, use Calibre to create an .azw3 file in addition to epub files.
		With the -b flag, create a Kobo-compatible .kepub.epub file in addition to epub files.
		With the -c flag, use epubcheck to validate the epub.  If -k is also specified and epubcheck fails, don't create a Kindle file.
		With the -p flag, insert additional CSS rules that are helpful for proofreading.  Output filenames will contain ".proof".
EOF
	exit
}
die(){ printf "Error: ${1}\n" 1>&2; exit 1; }
require(){ command -v $1 > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
if [ $# -eq 1 ]; then if [ "$1" = "--help" -o "$1" = "-h" ]; then usage; fi fi
#End boilerplate

#Check for dependencies
require "xsltproc" "Try: apt-get install xsltproc"
require "xmllint" "Try: apt-get install libxml2-utils"
require "xmlstarlet" "Try: apt-get install xmlstarlet"
require "xpath" "Try: apt-get install libxml-xpath-perl"
require "mogrify" "Try: apt-get install imagemagick"
require "zip" "Try: apt-get install zip"

if [ $# -eq 0 ]; then
	usage
fi

scriptDir="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
curDir="$(pwd)"
srcDir="."
kindle="false"
kobo="false"
verbose="false"
check="false"
proofreadingCss="false"
dirs=""
updateAsinPath="${scriptDir}/update-asin"
makeUrlSafePath="${scriptDir}/make-url-safe"
svgWidth="1400"
svgHeight="2100"
thumbWidth=$(expr ${svgWidth} "/" 4)
thumbHeight=$(expr ${svgHeight} "/" 4)
destDir="$(pwd)" #no trailing slash

#Some convenience aliases
wj="\xE2\x81\xA0" #word joiner, U+2060
thinsp=" " #thin space, U+2009
nbsp=" " #no-break space, U+00A0
zwnbsp="\xEF\xBB\xBF" #zero-width no-break space, U+FEFF
shy="\xC2\xAD" #soft hyphen, U+00AD

while [ $# -gt 0 ]
do
	case "$1" in
		-k|--kindle)
			kindle="true"
		;;
		-b|--kobo)
			kobo="true"
		;;
		-c|--check)
			check="true"
		;;
		-v|--verbose)
			verbose="true"
		;;
		-p|--proofreading-css)
			proofreadingCss="true"
		;;
		-o=*|--output-dir=*)
			destDir="$(echo "${1}" | sed 's/[-a-zA-Z0-9]*=//')"
		;;
		*)
			dirs=$(printf "%s\n%s" "${dirs}" "$1")
		;;
	esac
	shift
done

if [ "${check}" = "true" ]; then
	require "epubcheck" "Try: apt-get install epubcheck"
fi

if [ "${kindle}" = "true" ]; then
	require "ebook-convert" "Try: http://calibre-ebook.com/download"
	require "ebook-meta" "Try: http://calibre-ebook.com/download"
fi

destDir="$(realpath "${destDir}")"

mkdir --parents "${destDir}" &> /dev/null

if [ ! -d "${destDir}" ]; then
	die "Couldn't create output directory."
fi

printf "%s\n" "${dirs}" | while IFS= read -r i;
do
	if [ "${i}" = "" ]; then
		continue
	fi

	cd "${curDir}" #Reset when starting the loop over

	srcDir="$(realpath "${i%/}")"

	if [ ! -d "${srcDir}/src" ]; then
		die "${srcDir} doesn't look like a Standard Ebook source directory."
	fi

	rm --force "${destDir}"/*.epub "${destDir}"/*.epub3 "${destDir}"/*.azw3 "${destDir}"/cover*.jpg "${destDir}"/thumbnail_*portrait.jpg

	#Get work title
	#We have to use xmlstarlet here because xpath chokes on utf8
	title="$(xmlstarlet sel -N dc="http://purl.org/dc/elements/1.1/" -t -m "//dc:title" -v "." -n "${srcDir}/src/epub/content.opf" | head -n 1)"
	author="$(xmlstarlet sel -N dc="http://purl.org/dc/elements/1.1/" -t -m "//dc:creator" -v "." -n "${srcDir}/src/epub/content.opf" | head -n 1)"

	urlTitle=$("${makeUrlSafePath}" "${title}")
	urlAuthor=$("${makeUrlSafePath}" "${author}")
	outputFilename="${urlAuthor}_${urlTitle}"

	workDir="/tmp/${outputFilename}.epub"
	epub2OutputFilename="${outputFilename}"
	kindleOutputFilename="${outputFilename}"
	koboOutputFilename="${outputFilename}"
	outputFilename="${outputFilename}"

	if [ "${proofreadingCss}" = "true" ]; then
		epub2OutputFilename="${epub2OutputFilename}.proof"
		kindleOutputFilename="${kindleOutputFilename}.proof"
		koboOutputFilename="${koboOutputFilename}.proof"
		outputFilename="${outputFilename}.proof"
	fi

	epub2OutputFilename="${epub2OutputFilename}.epub"
	kindleOutputFilename="${kindleOutputFilename}.azw3"
	koboOutputFilename="${koboOutputFilename}.kepub.epub"
	outputFilename="${outputFilename}.epub3"

	if [ "${verbose}" = "true" ]; then
		printf "\tBuilding %s ..." "${outputFilename}"
	fi

	#Set up our working directory in /tmp/.
	rm --force --recursive "${workDir}"
	mkdir "${workDir}"
	cp --recursive --dereference "${srcDir}/src"/* "${workDir}"
	cd "${workDir}"

	#Find the epub source directory.
	epubDir="$(xpath -e "string(//rootfile/@full-path)" "META-INF/container.xml" 2> /dev/null | sed "s/\/content.opf//")"

	#Are we including proofing CSS?
	if [ "${proofreadingCss}" = "true" ]; then
		cat "${scriptDir}/templates/proofreading.css" >> "${workDir}/${epubDir}/css/local.css"
	fi

	#Output a pure epub3 file.
	zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${outputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1


	if [ "${kobo}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf " OK\n"
			printf "\tBuilding %s ..." "${koboOutputFilename}"
		fi
	elif [ "${verbose}" = "true" ]; then
		printf " OK\n"
		printf "\tBuilding %s ..." "${epub2OutputFilename}"
	fi

	#Now add epub2 compatibility.
	#Set xmllint to use tab indentation.
	export XMLLINT_INDENT=$(printf "\t")

	#Include compatibility CSS
	cat "${scriptDir}/templates/compatibility.css" >> "${workDir}/${epubDir}/css/core.css"

	#Simplify tags
	"${scriptDir}/simplify-tags" "${workDir}"

	#To get popup footnotes in iBooks, we have to change epub:rearnote to epub:footnote.
	#Remember to get our custom style selectors too.
	find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/epub:type=\"([^\"]*?)rearnote([^\"]*?)\"/epub:type=\"\1footnote\2\"/g"
	find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/class=\"([^\"]*?)epub-type-rearnote([^\"]*?)\"/class=\"\1epub-type-footnote\2\"/g"
	find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/rearnote/footnote/g"

	#Include extra lang tag for accessibility compatibility.
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/xml\:lang\=\"([^\"]+?)\"/lang=\"\1\" xml:lang=\"\1\"/g" "{}" \;

	#Typography: replace double and triple em dash characters with extra em dashes.
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⸺/——/g" "{}" \;
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⸻/———/g" "{}" \;

	#Typography: replace some other less common characters.
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/⅒/1\/10/g" "{}" \;
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/℅/c\/o/g" "{}" \;

	#Many e-readers don't support the word joiner character (U+2060 aka &#8288; aka 0xE2 0x81 0xA0).
	#They DO, however, support the now-deprecated zero-width non-breaking space, (U+FEFF aka &#65279; aka 0xEF 0xBB 0xBF)
	#For epubs, do this replacement.  Kindle now seems to handle everything fortunately.
	find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/${wj}/${zwnbsp}/ig" "{}" \;

	#Convert svg images to png images
	#Mogrify reports the svg as the wrong size, so we have to force the size.
	#We also generate a thumbnail for the OPDS feed all in one go.  Note that we can't use percentages to resize, since mogrify auto-detects the wrong svg size to begin with.
	mogrify -resize "${svgWidth}x${svgHeight}" -format jpg "${workDir}/epub/images/cover.svg"
	cp "${workDir}/epub/images/cover.jpg" "${destDir}/cover.jpg"
	cp "${workDir}/epub/images/cover.svg" "${destDir}/cover-thumbnail.svg"
	mogrify -resize "${thumbWidth}x${thumbHeight}" -quality 100 -format jpg "${destDir}/cover-thumbnail.svg"
	rm "${workDir}/epub/images/cover.svg"
	rm "${destDir}/cover-thumbnail.svg"

	find "${workDir}" -iname "*.svg" -exec sh -c 'rsvg-convert -z 2 -a -f png -o "${0%.svg}.png" "$0"' "{}" \; -exec rm "{}" \; #Convert svg to png, then delete svg. The background flag ensure we get transparency.
	find "${workDir}" -type f \( ! -iname "*.jpg" \) -print0 | xargs -0 sed --in-place "s/cover.svg/cover.jpg/g" 	#Replace references to .svg with .png. Ignore png files, because otherwise this command will corrupt them.
	sed --in-place --regexp-extended "s/id=\"cover.jpg\" media\-type=\"image\/svg\+xml\"/id=\"cover.jpg\" media\-type=\"image\/jpeg\"/g" "${workDir}/${epubDir}/content.opf"	#Replace mime type declarations in content.opf
	find "${workDir}" -type f \( ! -iname "*.png" \) -print0 | xargs -0 sed --in-place "s/\.svg/.png/g" 		#Replace references to .svg with .png. Ignore png files, because otherwise this command will corrupt them.
	sed --in-place --regexp-extended "s/image\/svg\+xml/image\/png/g" "${workDir}/${epubDir}/content.opf"	#Replace mime type declarations in content.opf
	sed --in-place "s/properties=\"svg\"//g" "${workDir}/${epubDir}/content.opf"				#We have to remove these references to satisfy epubcheck.

	#At this point we can build the Kobo epub
	if [ "${kobo}" = "true" ]; then
		cp -r "${workDir}" "${workDir}.kobo"

		cd "${workDir}.kobo"

		"${scriptDir}/build-kobo" "${workDir}.kobo"

		zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${koboOutputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1

		rm -rf "${workDir}.kobo"
		cd "${workDir}"

		if [ "${verbose}" = "true" ]; then
			printf " OK\n"
			printf "\tBuilding %s ..." "${epub2OutputFilename}"
		fi
	fi

	#Include epub2 cover metadata
	coverId="$(xpath -e "string(//item[@properties=\"cover-image\"]/@id)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
	sed --in-place --regexp-extended "s/(<metadata.*)/\1<meta content=\"${coverId}\" name=\"cover\" \/>/g" "${workDir}/${epubDir}/content.opf"

	#Add metadata to content.opf indicating this file is a Standard Ebooks compatibility build
	sed --in-place --regexp-extended "s/<dc:publisher/<meta property=\"se:transform\">compatibility<\/meta>\n\t\t<dc:publisher/g" "${workDir}/${epubDir}/content.opf"

	#Generate our NCX file for epub2 compatibility.
	#First find the ToC file.
	tocFilename="$(xpath -e "string(//item[@properties=\"nav\"]/@href)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
	sed --in-place "s/<spine>/<spine toc=\"ncx\">/g" "${workDir}/${epubDir}/content.opf"
	sed --in-place "s/<manifest>/<manifest><item href=\"toc.ncx\" id=\"ncx\" media-type=\"application\/x-dtbncx+xml\" \/>/g" "${workDir}/${epubDir}/content.opf"
	xsltproc --stringparam cwd "${workDir}/" "${scriptDir}/data/navdoc2ncx.xsl" "${workDir}/${epubDir}/${tocFilename}" > "${workDir}/${epubDir}/toc.ncx"
	sed --in-place --regexp-extended "s/ xml\:lang=\"\?\?\"//g" "${workDir}/${epubDir}/toc.ncx"
	#Make nicely incrementing navpoint IDs and playOrders
	sed --in-place "s/<navMap id=\".*\">/<navMap id=\"navmap\">/" "${workDir}/${epubDir}/toc.ncx"
	perl -pi -e 's/\<navPoint id\="idp[0-9]+"/"<navPoint id=\"navpoint-" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
	perl -pi -e 's/\<navPoint/"<navPoint playOrder=\"" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
	xmllint --c14n "${workDir}/${epubDir}/toc.ncx" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/epub/toc.ncx" --format -

	#Convert the guide
	#We add the 'text' attribute to the titlepage to tell the reader to start there
	xpath -q -e "//nav[@epub:type=\"landmarks\"]/ol/li/a" "${workDir}/${epubDir}/$tocFilename" \
		| sed --regexp-extended "s/epub:type=\"([^\"]*)(\s*frontmatter\s*|\s*backmatter\s*)([^\"]*)\"/type=\"\1\3\"/g" \
		| sed --regexp-extended "s/epub:type=\"[^\"]*(acknowledgements|bibliography|colophon|copyright-page|cover|dedication|epigraph|foreword|glossary|index|loi|lot|notes|preface|bodymatter|titlepage|toc)[^\"]*\"/type=\"\1\"/g" \
		| sed "s/type=\"copyright\-page/type=\"copyright page/g" \
		| sed "s/type=\"titlepage/type=\"title-page text/g" \
		| sed "s/type=\"appendix/type=\"/g" \
		| sed "/type=\"\s*\"/d" \
		| sed "s/<a/<reference/g" \
		| sed --regexp-extended "s/>(.+)<\/a>/ title=\"\1\" \/>/g" \
		| (printf "%s\n" "<guide>" && cat) \
		| (cat && printf "%s\n" "</guide>") >> "${workDir}/${epubDir}/content.opf"
	sed --in-place "s/<\/package>//g" "${workDir}/${epubDir}/content.opf"
	printf "%s\n" "</package>" >> "${workDir}/${epubDir}/content.opf"
	xmllint --c14n "${workDir}/${epubDir}/content.opf" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/${epubDir}/content.opf" --format -

	#Add some compatibility CSS rules
	find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/(page\-break\-(before|after|inside)\s*\:\s*(.+))/\1\n\t-webkit-column-break-\2: \3 \/* For Readium *\//g"
	find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/^\s*hyphens\s*\:\s*(.+)/\thyphens: \1\n\tadobe-hyphenate: \1\n\t-webkit-hyphens: \1\n\t-epub-hyphens: \1\n\t-moz-hyphens: \1/g"
	find "${workDir}" -iname "*.css" -print0 | xargs -0 sed --in-place --regexp-extended "s/^\s*hyphens\s*\:\s*none;/\thyphens: none;\n\tadobe-text-layout: optimizeSpeed; \/* For Nook *\//g"

	#Add soft hyphens
	"${scriptDir}/hyphenate" --ignore-h-tags "${workDir}"

	#Hyphenate screws up our nice XHTML formatting so clean it up for distribution
	"${scriptDir}/clean" "${workDir}/"

	#Create the compatible epub file
	zip -9 --no-dir-entries -X --recurse-paths "${destDir}/${epub2OutputFilename}" mimetype META-INF "${epubDir}" > /dev/null 2>&1

	if [ "${verbose}" = "true" ]; then
		printf " OK\n"
	fi

	if [ "${check}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "\tRunning epubcheck ..."
		fi
		output="$(epubcheck ${destDir}/${epub2OutputFilename} 2>&1)"
		if [ $? -ne 0 ]; then
			printf "%s\n" "${output}"
			exit 1
		fi
		if [ "${verbose}" = "true" ]; then
			printf " OK\n"
		fi
	fi

	if [ "${kindle}" = "true" ]; then
		if [ "${verbose}" = "true" ]; then
			printf "\tBuilding %s ..." "${kindleOutputFilename}"
		fi

		epubSource="/tmp/${epub2OutputFilename}.tmp.epub"

		#Kindle doesn't go more than 2 levels deep for ToC, so flatten it here.  We copy and paste some of the code above...
		#later we should update it to a less hacky way of doing things.
		"${scriptDir}/toc2kindle" "${workDir}/${epubDir}/${tocFilename}"
		"${scriptDir}/clean" "${workDir}/${epubDir}/${tocFilename}"
		xsltproc --stringparam cwd "${workDir}/" "${scriptDir}/data/navdoc2ncx.xsl" "${workDir}/${epubDir}/${tocFilename}" > "${workDir}/${epubDir}/toc.ncx"
		sed --in-place --regexp-extended "s/ xml\:lang=\"\?\?\"//g" "${workDir}/${epubDir}/toc.ncx"

		#Make nicely incrementing navpoint IDs and playOrders
		sed --in-place "s/<navMap id=\".*\">/<navMap id=\"navmap\">/" "${workDir}/${epubDir}/toc.ncx"
		perl -pi -e 's/\<navPoint id\="idp[0-9]+"/"<navPoint id=\"navpoint-" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
		perl -pi -e 's/\<navPoint/"<navPoint playOrder=\"" . ++$n . "\""/ge' "${workDir}/${epubDir}/toc.ncx"
		xmllint --c14n "${workDir}/${epubDir}/toc.ncx" | (printf "%s\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" && cat) | xmllint --output "${workDir}/epub/toc.ncx" --format -

		#Kindle doesn't recognize most zero-width spaces or word joiners, so just remove them.
		#It does recognize the word joiner character, but only in the old mobi7 format.  The new format renders them as spaces.
		find "${workDir}" -iname "*.xhtml" -exec sed --in-place --regexp-extended "s/${zwnbsp}//ig" "{}" \;

		#Append our kindle compatibility CSS file to the core CSS file.
		cat "${scriptDir}/templates/kindle.css" >> "${workDir}/${epubDir}/css/core.css"

		#Convert endnotes to Kindle popup compatible notes
		if [ -f "${workDir}/${epubDir}/text/endnotes.xhtml" ]; then
			"${scriptDir}/endnotes2kindle" "${workDir}/${epubDir}/text/endnotes.xhtml"

			#While Kindle now supports soft hyphens, popup endnotes break words but don't insert the hyphen characters.  So for now, remove soft hyphens from the endnotes file.
			sed --in-place "s/${shy}//g" "${workDir}/${epubDir}/text/endnotes.xhtml"
		fi

		#Remove the epub:type attribute, as Calibre turns it into just "type"
		find "${workDir}" -iname "*.xhtml" -print0 | xargs -0 sed --in-place --regexp-extended "s/epub:type=\"[^\"]*?\"//g"

		#Re-ceate the compatible epub file
		zip -9 --no-dir-entries -X --recurse-paths "${epubSource}" mimetype META-INF "${epubDir}" > /dev/null 2>&1

		#Generate the kindle file
		coverPath="$(xpath -e "string(//item[@properties=\"cover-image\"]/@href)" "${workDir}/${epubDir}/content.opf" 2> /dev/null)"
		#ebook-convert "${epubSource}" "${destDir}/${kindleOutputFilename}" --mobi-file-type="both" --pretty-print --no-inline-toc --max-toc-links=0 --prefer-metadata-cover --cover="${workDir}/${epubDir}/${coverPath}" > /dev/null 2>&1
		ebook-convert "${epubSource}" "${destDir}/${kindleOutputFilename}" --pretty-print --no-inline-toc --max-toc-links=0 --prefer-metadata-cover --cover="${workDir}/${epubDir}/${coverPath}" > /dev/null 2>&1

		if [ $? -eq 0 ]; then
			#Get the ASIN for the thumbnail
			#The ASIN is set to the SHA-1 sum of the book's identifying URL.
			bookId=$(grep --only-matching --extended-regexp "<dc:identifier id=\"uid\">url:[^<]+</dc:identifier>" "${workDir}/${epubDir}/content.opf" | sed --regexp-extended "s/<[^>]+>//g" | sed "s/^url://")
			asin=$(printf "%s" "${bookId}" | sha1sum | cut -d " " -f 1)

			#Update the ASIN in the generated file
			"${updateAsinPath}" "${asin}" "${destDir}/${kindleOutputFilename}" "${workDir}/${kindleOutputFilename}" > /dev/null
			mv "${workDir}/${kindleOutputFilename}" "${destDir}/${kindleOutputFilename}"

			#Extract the thumbnail
			asin="$(ebook-meta "${destDir}/${kindleOutputFilename}" | grep --only-matching --extended-regexp "mobi\-asin:.+" | cut -c11-)"
			convert "${workDir}/${epubDir}/${coverPath}" -resize 432x660 "${destDir}/thumbnail_${asin}_EBOK_portrait.jpg" > /dev/null 2>&1

			if [ "${verbose}" = "true" ]; then
				printf " OK\n"
			fi
		fi
	fi
done