-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_to_html_table.py
executable file
·44 lines (38 loc) · 1.78 KB
/
convert_to_html_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import argparse
import re
import sys
import os
import pandas as pd
from lxml import html
# argument:
p = argparse.ArgumentParser(
description="This script is used to form manifest of 16S demuxed sequences for the use of qiime2")
p.add_argument('-i', '--input', dest='input', metavar='<path>', required=True,
help='Path of input Table, either txt or html')
p.add_argument('-t', '--type', dest='type', metavar='<str>', default="auto", required=False,
help='The type of input file. txt(must be tab seprated) or html. default is auto, which decide the file type by postfix of file name')
p.add_argument('-k', '--key-word', dest='key', metavar='<str>', default="auto", required=True,
help='The key-word supposed to be replaced by this table in target html')
p.add_argument('-o', '--output', dest='out', metavar='<directory>', default='./test.html', required=True,
help='The path of target html to which this table will input')
options = p.parse_args()
file_type = re.search(r'[^\.]+$', options.input).group() if options.type == "auto" else options.type
if file_type == 'txt' or file_type == 'csv':
out_df = pd.read_csv(options.input, sep='\t')
elif file_type == "html":
out_df = pd.read_html(options.input)[0]
else:
f = open(options.input, 'r')
raw_html = f.read()
tree = html.fromstring(raw_html)
jason = tree.xpath("//script[@id='data']/text()")[0]
json = pd.read_json(jason, typ='series')
columns = [i[0] for i in json['columns']]
out_df = pd.DataFrame(json['data'])
out_df.columns = columns
table_html = out_df.to_html(index=False)
with open(options.out, 'r') as f1:
target = f1.read()
target = target.replace(options.key, '\n' + table_html + '\n')
with open(options.out, 'w') as f2:
f2.write(target)