-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
58 lines (55 loc) · 2.77 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import argparse
from trees2vectors import trees2vectors
from sparse_transform import sparse_transform
from collect_statistic import collect_statistic
from normalize import normalize
from vectors2matrix import vectors2matrix
from matrix2csv import matrix2csv
parser = argparse.ArgumentParser()
parser.add_argument('--input_folder', '-i', nargs=1, type=str, help='folder with trees')
parser.add_argument('--output_folder', '-o', nargs=1, type=str,
help='output folder with files, which will contain tree_features and feature values as JSON')
parser.add_argument('--features_file', '-f', nargs=1, type=str, help='path to file with features')
parser.add_argument('--sparse_format', default='list', choices=['list', 'map'])
parser.add_argument('--all_features_file', nargs=1, type=str,
help='path to all_features file (in JSON) generated by trees2vectors stage')
parser.add_argument('--output_file', nargs=1, type=str,
help='path to output matrix file (dataset.json (vectors2matrix stage)'
', dataset.csv (matrix2csv stage), for example)')
parser.add_argument('--input_file', nargs=1, type=str,
help='path to input matrix file (dataset.json, for example)')
parser.add_argument('-n', nargs='*', default=[1, 2, 3], help='n for collect n-grams statistic')
parser.add_argument('--stage', '-s',
choices=['trees2vectors', 'sparse_transformation',
'normalize', 'collect_statistic', 'vectors2matrix', 'matrix2csv'])
args = parser.parse_args()
stage = args.stage
if stage == 'trees2vectors':
output_folder = args.output_folder[0]
input_folder = args.input_folder[0]
features_file = args.features_file[0]
trees2vectors(input_folder, output_folder, features_file)
elif stage == 'sparse_transformation':
output_folder = args.output_folder[0]
input_folder = args.input_folder[0]
sparse_format = args.sparse_format
all_features_file = args.all_features_file[0]
sparse_transform(input_folder, output_folder, all_features_file, sparse_format)
elif stage == 'normalize':
output_folder = args.output_folder[0]
input_folder = args.input_folder[0]
all_features_file = args.all_features_file[0]
normalize(input_folder, output_folder, all_features_file)
elif stage == 'collect_statistic':
output_folder = args.output_folder[0]
all_features_file = args.all_features_file[0]
n = args.n
collect_statistic(output_folder, all_features_file, n)
elif stage == 'vectors2matrix':
input_folder = args.input_folder[0]
output_file = args.output_file[0]
vectors2matrix(input_folder, output_file)
elif stage == 'matrix2csv':
input_file = args.input_file[0]
output_file = args.output_file[0]
matrix2csv(input_file, output_file)