-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpage_properties_report.py
299 lines (244 loc) · 11.1 KB
/
page_properties_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
from collections import defaultdict
from sphinx.util.docutils import nodes
from sphinx.util.docutils import SphinxDirective
from sphinx.application import Sphinx
import pandas as pd
"""_summary_
Key-Value pairs are added as field_names to documents, which can then be summarized
in a table by calling the page_properties_report directive.
"""
fieldname_dict = {
'fieldname_pagetype' : 'conf_pagetype',
'fieldname_labels' : 'conf_labels',
'fieldname_pageid' : 'conf_pageid',
'fieldname_parent' : 'conf_parent'
} # dict with field names
fieldname_mapping = {
'conf_labels': 'Labels',
'conf_pageid': 'Confluence Page ID',
'conf_parent': 'Confluence Parent Page ID',
'conf_pagetype': "Confluence Page type",
'doc_author': 'Author',
'doc_owner': 'Owner',
'doc_title': 'Title',
'doc_last_changed': "Last changed",
'doc_status': "Status",
'doc_review': "Review",
'doc_review_by': "Review by",
'doc_review_date': "Review date",
'doc_version': "Version",
'doc_lang': "Language"
}
report_child_pagetype = 'reportchild' # value of pagetype for a report child page
report_field_labels = [] # the labels we are looking for
report_columns = [] # the columns we want in our report
# my very simple and initial attempt at setting up a node to store data
class StorageNode(nodes.Element):
pass
class MyParagraphNode(nodes.General, nodes.Element):
pass
class LabelRequestPlaceholderNode(nodes.General, nodes.Element):
def __init__(self, requested_labels, *args, **kwargs):
super().__init__(*args, **kwargs)
self._requested_labels = requested_labels
@property
def requested_labels(self):
return self._requested_labels
class PagePropertiesReport(SphinxDirective):
has_content = True
required_arguments = 1
optional_arguments = 1
def run(self):
arg_labels = self.arguments[0]
arg_columns = self.arguments[1] if len(self.arguments) > 1 else None
# hacky way to split the argument if comma-separated without a space
arg_labels = str(self.arguments[0])
if "," in self.arguments[0]:
globals()["report_field_labels"] = arg_labels.split(',')
else:
globals()["report_field_labels"] = (self.arguments)
if len(self.arguments) > 1:
if "," in arg_columns:
globals()["report_columns"] = arg_columns.split(',')
else:
globals()['report_columns'] = arg_columns
# making sure that the first column is 'doc_title'
if 'doc_title' not in globals()['report_columns']:
globals()['report_columns'].insert(0, 'doc_title')
elif globals()['report_columns'].index('doc_title') != 0:
item_to_move = globals()['report_columns'].index('doc_title')
globals()['report_columns'].pop(item_to_move)
globals()['report_columns'].insert(0, 'doc_title')
requested_field_labels = report_field_labels
label_request_placeholder_node = LabelRequestPlaceholderNode(requested_field_labels)
return [label_request_placeholder_node]
# I want to replace this below with the content I stored in storage_node
# paragraph_node = nodes.paragraph(text=f"All docs: {self.env.found_docs}\n")
# return [paragraph_node]
def __repr__(self):
return str(self.val)
def get_docinfo_from_env(app, env): # env-updated | event 10
"""Make a mapping of document names to field lists."""
field_data = {}
for document_name, field_metadata in env.metadata.items():
page_type = field_metadata.get(fieldname_dict['fieldname_pagetype'], '')
field_list = field_metadata.get(fieldname_dict['fieldname_labels'], [])
if field_list:
field_list = field_list.replace(' ','').split(',') # create a list
if not field_list:
continue
if report_child_pagetype != page_type:
continue
required_element_missing = any(
element not in field_list
for element in report_field_labels
)
if required_element_missing:
continue
field_data[document_name] = field_metadata
env.field_data = field_data
label_document_pairs = { # {('l1', 'doc3'), ('l2', 'doc1'), ('l2', 'doc3')}
(label.strip(), doc_name)
# for doc_name, doc_meta in env.metadata.items()
for doc_name, doc_meta in field_data.items()
for label in doc_meta.get(fieldname_dict['fieldname_labels'], '').split(',')
}
label_to_document_mapping = defaultdict(set) # label -> document names set| example data: {'l1': {'doc3'}, 'l2': {'doc1', 'doc3'}}
# label_to_document_mapping['l1'].add('doc1') # {'l1': {'doc1'}}
# label_to_document_mapping['l2'] = 'stuff'
for label, doc_name in label_document_pairs:
label_to_document_mapping[label].add(doc_name)
env.label_to_document_mapping = label_to_document_mapping
def create_table_node(dataset):
# if report_columns is not defined or empty, then it will display all columns.
# report_columns = []
# report_columns = ['doc_title','doc_status','doc_author','doc_last_changed']
# Create a Pandas DataFrame from the field data
df = pd.DataFrame(dataset)
# Transposing the table
df_transposed = df.T
# choosing which fields to keep in the table
if "report_columns" in globals() or globals()['report_columns'] != []:
df_transposed = df_transposed.loc[:,globals()['report_columns']]
## replace the column headers with the values from dict "fieldname_mapping"
df_transposed.rename(columns=fieldname_mapping, inplace=True)
# Get the column names and data from the DataFrame
columns = df_transposed.columns.tolist()
data = df_transposed.values.tolist()
# Calculate the maximum width of each column
column_widths = [max(len(str(value)) for value in column) for column in zip(*data)]
header_widths = [len(str(element)) for element in columns]
# pick the widest between the values or the headers
counter = 0
for n in column_widths:
if header_widths[counter] > n:
column_widths[counter] = header_widths[counter]
counter = counter + 1
# create the table node
table_node = nodes.table() # OK
# amount of columns in a variable
columns_number = len(globals()['report_columns'])
# Create a tgroup node to define the table structure:
table_group_node = nodes.tgroup(cols=columns_number)
table_node += table_group_node
# Create colspec nodes to define column specifications
# syntax "nodes.colspec(colwidth=1)"
colspec_nodes = []
for c_width in column_widths:
nodes.colspec(colwidth=c_width)
colspec_nodes.append(nodes.colspec(colwidth=c_width))
table_group_node += colspec_nodes
# Create the thead nodes and add them to the table
table_head_node = nodes.thead()
table_head_node_title = nodes.thead() # for the Page Properties title
title_row = nodes.row()
title_row += nodes.entry('',nodes.paragraph(text=f"Page Properties Report ({', '.join(report_field_labels)})"),morecols=columns_number)
table_head_node_title += title_row
table_group_node += table_head_node_title
table_group_node += table_head_node
# Create the tbody node and add it to the table
table_body_node = nodes.tbody()
table_group_node += table_body_node
# Create the header cells and add them to the header row
header_row = nodes.row()
for c_header in columns:
header_row += nodes.entry('',nodes.paragraph(text=c_header))
table_head_node += header_row
# Create rows of data cells with links
# It was a serious pain to make it work
for index, row in df_transposed.iterrows():
row_node = nodes.row()
table_body_node += row_node
entry_node = nodes.entry()
row_node += entry_node
text_node = nodes.paragraph()
reference_node = nodes.reference(refuri=index + ".html",text=index)
text_node += reference_node
entry_node += text_node
# Add the rest of the columns
for item in row[1:]:
entry_node = nodes.entry()
row_node += entry_node
entry_node += nodes.paragraph(text=str(item))
# uncomment to display the table content during output
#print(table_node.pformat())
return table_node
def replace_label_request_nodes_with_doc_refs(app, doctree, docname): # doctree-resolved | event 14
all_label_request_nodes = doctree.findall(LabelRequestPlaceholderNode)
document_to_filtered_data_mapping = app.env.field_data
label_to_document_mapping = app.env.label_to_document_mapping
for label_request_placeholder_node in all_label_request_nodes:
requested_labels = label_request_placeholder_node.requested_labels
data_table_nodes = create_table_node(document_to_filtered_data_mapping)
# data_table_nodes = _make_data_table_for_requested_labels(
# requested_labels,
# document_to_filtered_data_mapping,
# label_to_document_mapping,
# )
label_request_placeholder_node.replace_self(data_table_nodes)
def event_04_env_before_read_docs(app, env, docnames):
# docname: {docnames} + Event env_before_read_docs
pass
def event_06_source_read(app, docname, source):
# docname: {docname} + Event source_read
pass
def event_08_doctree_read(app, doctree): # this reads the doctree for every file being processed by #06
# doctree: {doctree} + Event doctree_read
pass
def event_10_env_updated(app, env):
# env.found_docs: {env.found_docs} + Event env_updated
pass
def event_12_env_check_consistency(app, env):
# env.found_docs: {env.found_docs} + Event env_check_consistency
# the field data obtained from env.storage_node: {env.storage_node.field_data}
pass
def event_14_doctree_resolved(app, doctree, docname):
# docname: {docname} + Event source_read
pass
def setup(app: Sphinx):
#
# FYI: A reference of the sequence of core events
#
# event 04, app.connect("env-before-read-docs", event_04_env_before_read_docs)
# event 06, app.connect("source-read", event_06_source_read)
# event 08, app.connect("doctree-read", event_08_doctree_read)
# event 10, app.connect("env-updated", event_10_env_updated)
# event 12, app.connect("env-check-consistency", event_12_env_check_consistency)
# event 14, app.connect("doctree-resolved", event_14_doctree_resolved)
# the directive
app.add_directive('page_properties_report', PagePropertiesReport)
# FYI: when to parse the documents
# "doctree-read" will process 1 doctree at a time
# "env-updated" will get me the whole env with
app.connect('doctree-resolved', replace_label_request_nodes_with_doc_refs) # all doctrees in it
# i've decided on env-updated to collect the info from the env
app.connect('env-updated', get_docinfo_from_env)
# Disable caching
app.config['env_cache'] = False
app.config['doctree_cache'] = False
app.config['env_purge'] = True
return {
'version': '1.0',
'parallel_read_safe': False,
'parallel_write_safe': False,
}