-
Notifications
You must be signed in to change notification settings - Fork 0
/
notebook.py
185 lines (146 loc) · 5.23 KB
/
notebook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import statistics
from js import fileNames, fileTexts
def find_table(tables, row_name="", col_name=""):
""" Find the table that contains the row that we are looking for
"""
for table in tables:
if table.find_all(string=re.compile(row_name))or row_name == "":
if table.find_all(string=re.compile(col_name)) or col_name == "":
return table
return
def get_value(table, row_name='', col_name=''):
""" Parse the table and get the value of column <col_name> and row <row_name>
"""
header = table.findChildren(['th'])
rows = table.findChildren(['tr'])
col_idx = -1
for c, col in enumerate(header):
if col.string and re.search(col_name, col.string):
col_idx = c
break
if col_idx >= 0:
for row in rows:
cells = row.findChildren('td')
for cell in cells:
if cell.string and re.search(row_name, cell.string):
return cells[col_idx].string
return ""
def get_info(soup, col_name, row_name):
""" get desired entry based on col/row name """
tables = soup.findChildren('table')
if len(tables) > 0:
table = find_table(tables, row_name, col_name)
else:
print("Did not find ANY tables!")
return
if table:
value = get_value(table, row_name=row_name, col_name=col_name).strip()
return value
else:
print("Did not find a table with col_name \"%s\" and row_name \"%s\"!" % (col_name, row_name))
return
def make_soup(filename):
""" load file and create parseable data structure """
# with open(filename, 'r') as f:
# contents = f.read()
soup = BeautifulSoup(filename)
return soup
def run(filename, queries):
''' run queries on one file'''
soup = make_soup(filename)
result = []
for query in queries:
row_name = query[0]
col_name = query[1]
info = get_info(soup, col_name, row_name)
result.append([query[0], query[1], info])
return result
def find_files(folder, pattern="*htm*"):
''' fine awr files, based on pattern'''
def either(c):
return '[%s%s]' % (c.lower(), c.upper()) if c.isalpha() else c
return glob.glob(os.path.join(folder, ''.join(map(either, pattern))))
def create_key(query):
''' Create key based on the query. This key is e.g. used as column name in output'''
if query[0] == "":
r = query[1]
elif query[1] == "":
r = query[0]
else:
r = query[0] + "_" + query[1]
return r.replace("\\","")
def create_keys(queries):
''' Creates keys based on the query. This key is e.g. used as column name in output'''
keys = []
for query in queries:
keys.append(create_key(query))
return keys
import re
from bs4 import BeautifulSoup
import os
import pandas as pd
import glob
# folder = 'C:\\foobar\\foobar'
# folder = '/tmp'
#folder = 'C:\\Users\\masandma\\OneDrive - Microsoft\\Arbeit\\ORCAS\\ORACLE\\customers\\Bawagpsk\\awr'
# folder = 'C:\\Users\masandma\\Desktop\\OPTSK00561730\\OPTSK00561730'
# folder = 'C:\\Users\\masandma\\OneDrive - Microsoft\\Arbeit\\ORCAS\\ORACLE\\customers\\AH'
# folder = 'C:\\Users\\masandma\\Desktop\\coba'
# folder = "C:\\fakepath\\"
# queries are of the form [row_name, col_name],
# either element can be empty quotes
queries=[
['user calls', 'per Second'],
['user commits', 'per Second'],
['Begin Snap:', 'Snap Time'],
['End Snap:', 'Snap Time'],
["", "Host Name"],
["", "DB Name"],
["", "Edition"],
["", "RAC"],
["", "CDB"],
["", "Release"],
["", "Platform"],
["", "CPUs"],
['Host Mem \(MB\):', 'Begin'],
['SGA use \(MB\):', 'Begin'],
['PGA use \(MB\):', 'Begin'],
['Memory Usage %:','End'],
['user calls','per Trans'],
['user commits','per Trans'],
['Redo size \(bytes\)', 'Per Second'],
['DB CPU', '% DB time'],
['physical read total IO requests','per Second'],
['physical write total IO requests', 'per Second'],
['physical read total bytes','per Second'],
['physical write total bytes', 'per Second'],
['log file sync','Avg wait'],
["", "Table Scans"],
['compatible','Begin value'],
['optimizer_features_enable','Begin value'],
]
#Not needes anymore, Script does not check Folders anymore but get passed the uploaded Files content as a String directly
# filenames = [fileName]
# filenames = find_files(folder)
# filenames = fileNames
if len(fileNames) == 0:
print("didn't find any files!")
else:
keys = create_keys(queries)
res_dict = {}
res_dict["filename"] = []
for file_name in fileNames:
res_dict["filename"].append(file_name)
# res_dict['filename'] = fileNames
for key in keys:
res_dict[key] = []
for index, filename in enumerate(fileNames):
info = run(fileTexts[index], queries)
for entry in info:
key = create_key(entry)
res_dict[key].append(entry[2])
df = pd.DataFrame(res_dict)
df.to_csv("coba.csv")
#This last expression gets returned to the Javascript side, it was previously "df.head()".
#However, this type of Object cannot be returned to JS.
df.to_csv(index=False)