forked from standardebooks/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreorder-endnotes
executable file
·69 lines (54 loc) · 2.6 KB
/
reorder-endnotes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
import argparse
import os
import sys
import fnmatch
import regex
from bs4 import BeautifulSoup
def is_positive_integer(value):
int_value = int(value)
if int_value <= 0:
raise argparse.ArgumentTypeError("{} is not a positive integer".format(value))
return int_value
def main():
parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.")
parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=is_positive_integer, help="the endnote number to start incrementing at")
parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
args = parser.parse_args()
directory = os.path.abspath(args.directory)
target_endnote_number = int(args.target_endnote_number)
endnote_count = 0
source_directory = os.path.join(directory, "src/")
#First get the highest numbered endnote
try:
endnotes_filename = os.path.join(source_directory, "epub/text/endnotes.xhtml")
with open(endnotes_filename, "r+", encoding="utf-8") as file:
xhtml = file.read()
soup = BeautifulSoup(xhtml, "lxml")
endnote_count = len(soup.select("li[data-se-note-number]"))
for endnote_number in range(endnote_count, target_endnote_number - 1, -1):
xhtml = xhtml.replace("id=\"note-{}\"".format(endnote_number), "id=\"note-{}\"".format(endnote_number + 1))
xhtml = xhtml.replace("data-se-note-number=\"{}\"".format(endnote_number), "data-se-note-number=\"{}\"".format(endnote_number + 1))
xhtml = xhtml.replace("#note-{}\"".format(endnote_number), "#note-{}\"".format(endnote_number + 1))
file.seek(0)
file.write(xhtml)
file.truncate()
except Exception:
print("Error: Couldn't open endnotes file: {}".format(endnotes_filename), file=sys.stderr)
exit(1)
for root, _, filenames in os.walk(source_directory):
for filename in fnmatch.filter(filenames, "*.xhtml"):
#Skip endnotes.xhtml since we already processed it
if filename == "endnotes.xhtml":
continue
with open(os.path.join(root, filename), "r+", encoding="utf-8") as file:
xhtml = file.read()
for endnote_number in range(endnote_count, target_endnote_number - 1, -1):
xhtml = regex.sub(r"(<a[^>]*?>){}</a>".format(endnote_number), "\g<1>{}</a>".format(endnote_number + 1), xhtml, flags=regex.MULTILINE | regex.DOTALL)
xhtml = xhtml.replace("id=\"note-{}\"".format(endnote_number), "id=\"note-{}\"".format(endnote_number + 1))
xhtml = xhtml.replace("#note-{}\"".format(endnote_number), "#note-{}\"".format(endnote_number + 1))
file.seek(0)
file.write(xhtml)
file.truncate()
if __name__ == "__main__":
main()