forked from standardebooks/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
find-unused-selectors
executable file
·89 lines (70 loc) · 3.17 KB
/
find-unused-selectors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
import argparse
import glob
import sys
import os
import regex
import lxml.cssselect
import lxml.etree as etree
XHTML_NAMESPACES = {"xhtml": "http://www.w3.org/1999/xhtml", "epub": "http://www.idpf.org/2007/ops", "z3998": "http://www.daisy.org/z3998/2012/vocab/structure/", "se": "http://standardebooks.org/vocab/1.0"}
def main():
parser = argparse.ArgumentParser(description="Find unused local.css CSS selectors in Standard Ebook source directories.")
parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
args = parser.parse_args()
for directory in args.directories:
directory = os.path.abspath(directory)
if not os.path.isdir(directory):
print("Error: Not a directory: {}".format(directory), file=sys.stderr)
exit(1)
if args.verbose:
print("Processing {} ...".format(directory), end="", flush=True)
try:
with open(os.path.join(directory, "src/epub/css/local.css"), encoding="utf-8") as file:
css = file.read()
except Exception:
print("Error: Couldn't open CSS file: {}".format(os.path.join(directory, "src/epub/css/local.css")), file=sys.stderr)
exit(1)
# Remove actual content of css selectors
css = regex.sub(r"{[^}]+}", "", css, flags=regex.MULTILINE)
# Remove trailing commas
css = regex.sub(r",", "", css)
# Remove comments
css = regex.sub(r"/\*.+?\*/", "", css, flags=regex.DOTALL)
# Remove @ defines
css = regex.sub(r"^@.+", "", css, flags=regex.MULTILINE)
# Construct a dictionary of selectors
selectors = set([line for line in css.splitlines() if line != ""])
unused_selectors = set(selectors)
# Get a list of .xhtml files to search
filenames = glob.glob(os.path.join(directory, "src/epub/text/") + "*.xhtml")
# Now iterate over each CSS selector and see if it's used in any of the files we found
for selector in selectors:
try:
sel = lxml.cssselect.CSSSelector(selector, translator="html", namespaces=XHTML_NAMESPACES)
except lxml.cssselect.ExpressionError:
# This gets thrown if we use pseudo-elements, which lxml doesn't support
unused_selectors.remove(selector)
continue
for filename in filenames:
if not filename.endswith("titlepage.xhtml") and not filename.endswith("imprint.xhtml") and not filename.endswith("uncopyright.xhtml"):
# We have to remove the default namespace declaration from our document, otherwise
# xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
with open(filename, "r") as file:
xhtml = file.read().replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")
tree = etree.fromstring(str.encode(xhtml))
if len(tree.xpath(sel.path, namespaces=XHTML_NAMESPACES)) > 0:
unused_selectors.remove(selector)
break
# Did we find any unused selectors?
if len(unused_selectors) > 0:
if args.verbose:
print("")
else:
print(directory)
for selector in unused_selectors:
print("\t" + selector)
elif args.verbose:
print(" OK")
if __name__ == "__main__":
main()