-
Notifications
You must be signed in to change notification settings - Fork 0
/
md2xlsform.py
executable file
·215 lines (163 loc) · 5.59 KB
/
md2xlsform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python3
import argparse
import json
import os
import pandas as pd
"""
%% survey
| type | name | label |
| --- | --- | --- |
| text | name | What is your name? |
| integer | age | How old are you? |
| select_one foods | food | What is your favourite meal? |
%% choices
| list_name | name | label |
| --- | --- | --- |
| foods | pasta | Pasta |
| foods | pizza | Pizza |
| foods | sushi | Sushi |
%% settings
| form_title |
| --- |
| Basics |
"""
def slice_into_sheets(file_content, sep='%%'):
list_of_sheets = [
strip_empty(x.split('\n'))
for x in strip_empty(file_content.split(sep))
]
sheets = {}
for sheet in list_of_sheets:
sheets[sheet[0].strip()] = sheet[1:]
return sheets
def get_dict_of_df_sheets(sheets):
updated_sheets = {}
for sheet_name, table in sheets.items():
columns, _, *content = table
list_cols = get_list_of_cells(columns)
list_content = get_list_of_cells(strip_empty(content))
all_content = [
{k: v for k, v in zip(list_cols, line)} for line in list_content
]
updated_sheets[sheet_name] = pd.DataFrame(all_content)
return updated_sheets
def strip_empty(content):
return [line for line in content if line]
def get_cells(line):
return [cell.strip() for cell in line.split('|') if cell]
def get_list_of_cells(content):
if isinstance(content, str):
return get_cells(content)
return [get_cells(line) for line in content]
def get_sheet_from_json(content, sheet):
_sheet = []
for item in content[sheet]:
new_item = {}
for k, v in item.items():
if k.startswith('$') or k == 'select_from_list_name':
continue
if k in content['translated']:
if (
len(content['translated']) == len(content['translations'])
) and not (
len(content['translations']) == 1
and content['translations'][0] is None
):
for val, trans in zip(v, content['translations']):
new_item[f'{k}::{trans}'] = val
else:
new_item[k] = v[0]
continue
new_item[k] = v
if 'type' in item and item['type'] in ['select_one', 'select_multiple']:
new_item['type'] = f"{item['type']} {item['select_from_list_name']}"
_sheet.append(new_item)
return _sheet
def order_sheet(sheet_dict, sheet_name):
df = pd.DataFrame(sheet_dict)
cols = list(df.columns)
if sheet_name == 'survey':
cols.remove('type')
cols = ['type'] + cols
return df[cols]
elif sheet_name == 'choices':
cols.remove('list_name')
cols = ['list_name'] + cols
return df[cols]
return df
def from_md(in_file, sep='%%'):
with open(in_file, 'r') as f:
xlsform = f.read()
sheets_dict = slice_into_sheets(xlsform, sep=sep)
return get_dict_of_df_sheets(sheets_dict)
def from_excel(in_file):
return pd.read_excel(in_file, index_col=None, sheet_name=None)
def from_json(in_file):
with open(in_file, 'r') as f:
content = json.loads(f.read())
sheets = ['survey', 'choices', 'settings']
project = {}
for sheet in sheets:
if not sheet in content or not content[sheet]:
continue
if sheet in ['survey', 'choices']:
project[sheet] = order_sheet(
get_sheet_from_json(content, sheet), sheet
)
else:
project[sheet] = order_sheet([content[sheet]], sheet)
return project
def to_excel(project, out_file):
with pd.ExcelWriter(out_file) as writer:
for sheet_name, df in project.items():
df.to_excel(writer, sheet_name=sheet_name, index=False)
def to_md(project, out_file, sep='%%'):
out = []
for k, v in project.items():
out.append(f'{sep} {k}')
v = v.fillna('')
out.append(v.to_markdown(index=False, tablefmt='github'))
with open(out_file, 'w') as f:
f.write('\n\n'.join(out))
def _get_extension(filename):
return filename.split('.')[-1]
def main():
parser = argparse.ArgumentParser(description='Convert Markdown to XLSForm')
parser.add_argument(
'--input', '-i', type=str, help='Input file, either md, xlsx or json'
)
parser.add_argument(
'--output', '-o', type=str, help='Output XLSForm, either xlsx or md'
)
parser.add_argument(
'--input-separator',
'-is',
type=str,
default='%%',
help='Markdown sheet separator character',
)
parser.add_argument(
'--output-separator',
'-os',
type=str,
default='%%',
help='Markdown sheet separator character',
)
args = parser.parse_args()
cwd = os.getcwd()
in_file = os.path.join(cwd, args.input)
out_file = os.path.join(cwd, args.output)
if not _get_extension(out_file) in ['xlsx', 'md']:
out_file = f'{out_file}.xlsx'
if in_file.endswith('.md'):
project = from_md(in_file, sep=args.input_separator)
if _get_extension(in_file) in ['xls', 'xlsx']:
project = from_excel(in_file)
elif in_file.endswith('.json'):
project = from_json(in_file)
if out_file.endswith('.xlsx'):
to_excel(project, out_file)
elif out_file.endswith('.md'):
to_md(project, out_file, sep=args.output_separator)
if __name__ == '__main__':
main()