-
Notifications
You must be signed in to change notification settings - Fork 13
/
simpleutils.py
85 lines (77 loc) · 2.45 KB
/
simpleutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# every utils that don't use torch
import csv
import datetime
import hashlib
import json
import logging
import multiprocessing as mp
import os
import tempfile
import time
class Timing():
def __init__(self, name='run time'):
self.name = name
self.t = time.time()
self.entered = False
def __enter__(self):
self.t = time.time()
self.entered = True
def __exit__(self, *ignored):
self.showRunTime(self.name)
def showRunTime(self, name):
print(self.name, ':', time.time() - self.t, 's')
def get_hash(s):
m = hashlib.md5()
m.update(s.encode('utf8'))
return m.hexdigest()
def read_config(path):
with open(path, 'r') as fin:
return json.load(fin)
def read_file_list(list_file):
files = []
if list_file.endswith('.csv'):
with open(list_file, 'r') as fin:
reader = csv.reader(fin)
firstrow = next(reader)
files = [row[0] for row in reader]
else:
with open(list_file, 'r', encoding='utf8') as fin:
for line in fin:
if line.endswith('\n'):
line = line[:-1]
files.append(line)
return files
s3_resource = None
def get_s3_resource():
import boto3
global s3_resource
if s3_resource is None:
s3_resource = boto3.resource('s3', endpoint_url='https://cos.twcc.ai')
return s3_resource
def download_tmp_from_s3(s3url):
s3_res = get_s3_resource()
d1 = s3url.find('/', 5)
bucket_name = s3url[5:d1]
object_name = s3url[d1+1:]
ext = os.path.splitext(s3url)[1]
obj = s3_res.Object(bucket_name, object_name)
_, tmpname = tempfile.mkstemp(suffix=ext, prefix='pfann')
try:
obj.download_file(tmpname)
return tmpname
except Exception as x:
os.unlink(tmpname)
raise RuntimeError('Unable to download %s: %s' % (s3url, x))
def init_logger(app_name):
os.makedirs('logs', exist_ok=True)
logger = mp.get_logger()
logger.setLevel(logging.INFO)
handler = logging.FileHandler('logs/%s.log' % app_name, encoding="utf8")
handler.setFormatter(logging.Formatter('[%(asctime)s] [%(processName)s/%(levelname)s] %(message)s'))
logger.addHandler(handler)
class MultiProcessInitLogger:
def __init__(self, app_name):
date_str = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
self.log_name = app_name + '-' + date_str
def __call__(self, *args):
init_logger(self.log_name)