-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.py
72 lines (58 loc) · 1.88 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
python port of https://github.com/wooorm/html-element-attributes/blob/main/build.js
"""
from bs4 import BeautifulSoup
import requests
import re
import sys
import collections
sys.setrecursionlimit(10000)
from HtmlElementAttributes import html_element_attributes
if "*" not in html_element_attributes:
html_element_attributes["*"] = []
global_attribs = html_element_attributes["*"]
page = requests.get("https://html.spec.whatwg.org/multipage/indices.html")
soup = BeautifulSoup(page.content, "html5lib")
for row in soup.select("#attributes-1 tbody tr"):
name = row.find("th").get_text().strip()
value = row.find("td").get_text().strip()
if re.search(r"custom elements", value, re.I):
continue
elements = (
["*"]
if re.search(r"HTML elements", value, re.I)
else [re.sub(r"\([^)]+\)", "", x).strip() for x in value.split(";")]
)
for element in elements:
# print(element)
element = element.lower()
if element not in html_element_attributes:
html_element_attributes[element] = []
if name not in html_element_attributes[element]:
html_element_attributes[element].append(name)
# sort values
final_list = {}
for x in html_element_attributes:
sorted_attributes = list(
sorted(
(
filter(
lambda g: g not in global_attribs or x == "*",
html_element_attributes[x],
)
)
)
)
if sorted_attributes:
final_list[x] = sorted_attributes
# sort keys
html_element_attributes = dict(collections.OrderedDict(sorted(final_list.items())))
with open(
"HtmlElementAttributes/html_element_attributes.py", "w+", encoding="utf8"
) as built:
built.write(
f"""\"\"\"
List of known HTML attributes names.
\"\"\"
html_element_attributes = {html_element_attributes}"""
)