-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathbuildArchetypes.py
128 lines (96 loc) · 3.64 KB
/
buildArchetypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from bs4 import BeautifulSoup
import requests
import json
import datetime
import codecs
import csv
import os
from pf2helpers import Pf2Helpers
feat_holder = {}
feat_holder['name'] = 'Pathfinder 2.0 ArcheType Feat List'
feat_holder['date'] = datetime.date.today().strftime("%B %d, %Y")
class BuildArchetypes:
holder = []
blacklist = ['[document]','noscript','header','html','meta','head', 'input','script', 'h1','img','b','h3']
pf = Pf2Helpers()
def load_links(self):
with open("archetypes.csv", encoding='utf-8-sig') as csv_file:
csv_reader = csv.DictReader(csv_file)
arch_list = []
#"class","link"
for row in csv_reader:
arch_data = {}
arch_data['class'] = row['class']
arch_data['link'] = row['link']
arch_list.append(arch_data)
#print(tl_data)
return arch_list
def load_other_archetypes(self):
list_of_links2 = []
main = self.pf.load_html("https://2e.aonprd.com/Archetypes.aspx")
h2s = main.find_all("h2", {"class": "title"})
for row in h2s:
#print(row.text)
links = row.find_all("a")
if len(links) > 1:
link_to_use = links[1]
else:
link_to_use = links[0]
list_of_links2.append({'class': link_to_use.text, 'link':"https://2e.aonprd.com/"+link_to_use['href']})
return list_of_links2
def parse_data(self, html):
arch_data = {}
#print("Type:", type(html))
data = BeautifulSoup(html, 'html5lib')
h1s = data.find_all("h1", {"class": "title"})
anchors = data.find_all("a", {"class": "external-link"})
img = data.find_all("img")
arch_data['name'] = h1s[0].text
arch_data['source'] = anchors[0].text
if(len(img) == 0):
arch_data['pfsFlag'] = "Excluded"
else:
arch_data['pfsFlag'] = img[0]['alt']
text_holder = []
text = data.find_all(text=True)
for t in text:
if t.parent.name not in self.blacklist:
#print("t:",t," parent:", t.parent.name)
text_holder.append(t)
#' '.join(mystring.split())
arch_data['description'] = " ".join(" ".join(text_holder).split())
return arch_data
def build_archetype(self, children):
for child in children:
if child.startswith("<h1"):
#print("child:", child)
arch_data = self.parse_data(child)
return arch_data
def get_archetype_data(self, link):
archetype = {}
children = self.pf.load_html(link).contents
children_list = self.pf.split_children(children)
archetype = self.build_archetype(children_list)
return archetype
def load_archetype_data(self, link_items):
for link_item in link_items:
#print("link", link_item['link'])
arch_holder = []
arch_holder.append(self.get_archetype_data(link_item['link']))
self.holder.extend(arch_holder)
def save_archetypes(self):
feat_holder['archtypes'] = self.holder
json_data = json.dumps(feat_holder, indent=4)
# print(json_data)
filename = "json/archetypes-pf2.json"
f = open(filename, "w")
f.write(json_data)
f.close
def main():
bf = BuildArchetypes()
link_items = bf.load_links()
link_items.extend(bf.load_other_archetypes())
bf.load_archetype_data(link_items)
bf.save_archetypes()
if __name__ == "__main__":
main()