-
Notifications
You must be signed in to change notification settings - Fork 27
/
build_tcpd.py
115 lines (88 loc) · 3.01 KB
/
build_tcpd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Collect and verify all time series that are not packaged in the repository.
Author: Gertjan van den Burg
License: See LICENSE file.
Copyright: 2019, The Alan Turing Institute
"""
import argparse
import platform
import os
DATASET_DIR = "./datasets"
TARGETS = [
("apple", "get_apple.py"),
("bee_waggle_6", "get_bee_waggle_6.py"),
("bitcoin", "get_bitcoin.py"),
("iceland_tourism", "get_iceland_tourism.py"),
("measles", "get_measles.py"),
("occupancy", "get_occupancy.py"),
("ratner_stock", "get_ratner_stock.py"),
("robocalls", "get_robocalls.py"),
("scanline_126007", "get_scanline_126007.py"),
("scanline_42049", "get_scanline_42049.py"),
]
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-v", "--verbose", help="Enable logging", action="store_true"
)
parser.add_argument(
"-o", "--output-dir", help="Output directory to store all time series"
)
parser.add_argument(
"action",
help="Action to perform",
choices=["collect", "clean"],
default="collect",
nargs="?",
)
return parser.parse_args()
def load_dataset_script(module_name, path):
"""Load the dataset collection script as a module
This is not a *super* clean way to do this, but it maintains the modularity
of the dataset, where each dataset can be downloaded individually as well
as through this script.
"""
version = platform.python_version_tuple()
if version[0] == "2":
import imp
module = imp.load_source(module_name, path)
elif version[0] == "3" and version[1] in ["3", "4"]:
from importlib.machinery import SourceFileLoader
module = SourceFileLoader(module_name, path).load_module()
else:
import importlib.util
spec = importlib.util.spec_from_file_location(module_name, path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def run_dataset_func(name, script, funcname):
dir_path = os.path.join(DATASET_DIR, name)
get_path = os.path.join(dir_path, script)
module = load_dataset_script("tcpd.%s" % name, get_path)
func = getattr(module, funcname)
func(output_dir=dir_path)
def collect_dataset(name, script):
return run_dataset_func(name, script, "collect")
def clean_dataset(name, script):
return run_dataset_func(name, script, "clean")
def main():
args = parse_args()
log = lambda *a, **kw: print(*a, **kw) if args.verbose else None
if args.action == "collect":
func = collect_dataset
elif args.action == "clean":
func = clean_dataset
else:
raise ValueError("Unknown action: %s" % args.action)
for name, script in TARGETS:
log(
"Running %s action for dataset: %s ... " % (args.action, name),
end="",
flush=True,
)
func(name, script)
log("ok", flush=True)
if __name__ == "__main__":
main()