-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.py
362 lines (287 loc) · 8.8 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
"""
Common functions/classes for dataprep.
"""
from random import choice
import numpy as np
import sys
import csv
import json
import cPickle
from StringIO import StringIO
################################################################################
# Decorators
################################################################################
def lazyprop(fn):
"""
Use as a decorator to get lazily evaluated properties.
"""
attr_name = '_lazy_' + fn.__name__
@property
def _lazyprop(self):
if not hasattr(self, attr_name):
setattr(self, attr_name, fn(self))
return getattr(self, attr_name)
return _lazyprop
################################################################################
# Wrappers for opening/closing files
################################################################################
def get_inout_files(infilename, outfilename, inmode='rb', outmode='wb'):
"""
Gets infile, and outfile, which are opened versions of infilename,
outfilename.
Parameters
----------
infilename : String
Name of file to read. If None, we will read from stdin
outfilename : String
Name of file to write. If None, we will write to stdout
outmode : String
Mode to open file in
Returns
-------
The tuple (infile, outfile)
Examples
--------
>>> infile, outfile = get_inout_files(infilename, outfilename)
>>> myfunction(infile, outfile,...)
>>> close_files(infile, outfile)
"""
infile = get_infile(infilename, inmode=inmode)
outfile = get_outfile(outfilename, outmode=outmode)
return infile, outfile
def close_files(infile, outfile):
"""
Closes the files if and only if they are not equal to sys.stdin, sys.stdout
Parameters
----------
infile : Open file buffer
outfile : Open file buffer
Examples
--------
>>> infile, outfile = get_inout_files(infilename, outfilename)
>>> myfunction(infile, outfile,...)
>>> close_files(infile, outfile)
"""
close_infile(infile)
close_outfile(outfile)
def close_infile(infile):
"""
Closes infile if and only if it is not equal to sys.stdin. Use with get_infile.
Parameters
----------
infile : Open file buffer
Examples
--------
>>> infile = get_infile(infilename)
>>> myfunction(infile,...)
>>> close_infile(infile)
"""
if infile != sys.stdin:
infile.close()
def close_outfile(outfile):
"""
Closes outfile if and only if it is not equal to sys.stdout. Use with get_outfile.
Examples
--------
>>> outfile = get_infile(outfilename)
>>> myfunction(outfile,...)
>>> close_outfile(outfile)
"""
if outfile != sys.stdout:
outfile.close()
def get_infile(infilename, inmode='rb'):
"""
Gets infile, which is an opened version of infilename.
Parameters
----------
infilename : String
Name of file to read. If None, we will read from stdin
Returns
-------
infile
Examples
--------
>>> infile = get_infile(infilename)
>>> myfunction(infile,...)
>>> close_infile(infile)
"""
if infilename:
infile = open(infilename, inmode)
else:
infile = sys.stdin
return infile
def get_outfile(outfilename, outmode='wb', default=sys.stdout):
"""
Open outfilename in outmode.
Parameters
----------
outfilename : String
Name of file to open and return.
If None, return the kwarg 'default'
outmode : String
Mode to open file in
default : File buffer
The value to return if outfilename is None
Returns
-------
outfile
Examples
--------
>>> outfile = get_outfile(outfilename)
>>> myfunction(outfile,...)
>>> close_outfile(outfile)
"""
if isinstance(outfilename, str):
outfile = open(outfilename, outmode)
elif outfilename is None:
outfile = default
else:
raise ValueError("Argument outfilename is of type %s. Not handled." % outfilename)
return outfile
def openfile_wrap(filename, mode):
"""
If filename is a string, returns an opened version of filename.
If filename is a file buffer, then passthrough.
Parameters
----------
filename : String or file buffer
mode : String
mode to open the file in
Returns
-------
ofile : Opened file buffer
was_path : Boolean
If True, then filename was a string (and thus was opened here, and so
you better remember to close it elsewhere)
Examples
--------
>>> infile, was_path = openfile_wrap(infilename, 'r')
>>> myfunction(infile,...)
>>> if was_path:
>>> infile.close()
"""
if isinstance(filename, str):
was_path = True
ofile = open(filename, mode)
elif isinstance(filename, file) or isinstance(filename, StringIO):
was_path = False
ofile = filename
else:
raise Exception("Could not work with %s" % filename)
return ofile, was_path
################################################################################
# Functions to read special file formats
################################################################################
def get_list_from_filerows(infile):
"""
Returns a list generated from rows of a file.
Parameters
----------
infile : File buffer or path
Lines starting with # are comments
Blank lines and leading/trailing whitespace are ignored
Other lines will be converted to a string and appended to a
list.
"""
f, was_path = openfile_wrap(infile, 'r')
kpv_list = []
for line in f:
# Strip whitespace
line = line.strip()
# Skip empty lines
if len(line) > 0:
# If the line isn't a comment
# Append the content to the list
if line[0] != '#':
kpv_list.append(line.rstrip('\n'))
if was_path:
f.close()
return kpv_list
def write_list_to_filerows(outfile, mylist):
"""
The inverse of get_list_from_filerows.
Parameters
----------
mylist : List
"""
f, was_path = openfile_wrap(outfile, 'w')
for item in mylist:
f.write(str(item) + '\n')
if was_path:
f.close()
def pickleme(obj, filename, protocol=2):
"""
Save obj to disk using cPickle.
Parameters
----------
obj : Serializable Python object
filename : String
Name of file to store obj to
protocol : 0, 1, or 2
2 is fastest
"""
with open(filename, 'w') as f:
cPickle.dump(obj, f, protocol=protocol)
def unpickleme(filename):
"""
Returns unpickled version of object.
Parameters
----------
filename : String
We will attempt to unpickle this file.
"""
with open(filename, 'r') as f:
return cPickle.load(f)
def get_structured_array(listoflists, schema, dropmissing=False):
"""
Uses schema to convert listoflists to a structured array.
Parameters
----------
listoflists : List of lists
schema : List of tuples
E.g. [(var1, type1),...,(varK, typeK)]
dropmissing : Boolean
If True, drop rows that contain missing values
"""
## First convert listoflists to a list of tuples...
# TODO : This CAN'T actually be necessary..find another way
if dropmissing:
tuple_list = [tuple(row) for row in loan_list if '' not in row]
else:
tuple_list = [tuple(row) for row in loan_list]
return np.array(tuple_list, schema)
################################################################################
# Custom Exceptions
################################################################################
class BadDataError(Exception):
"""
Dummy class that is exactly like the Exception class. Used to make sure
people are raising the intended exception, rather than some other wierd
one.
"""
pass
class ConfigurationSyntaxError(Exception):
"""
Dummy class that is exactly like the Exception class. Used to deal with syntax issues
config files.
"""
pass
################################################################################
# Functions for printing objects
################################################################################
def printdict(d, max_print_len=None):
s = ''
for key, value in d.iteritems():
s += str(key) + ': ' + str(value) + '\n'
if max_print_len:
print s[:max_print_len]
else:
print s
def print_dicts(dicts, prepend_str=''):
for key, value in dicts.iteritems():
if isinstance(value, dict):
print prepend_str + key
next_prepend_str = prepend_str + ' '
print_dicts(value, next_prepend_str)
else:
print "%s%s = %.5f"%(prepend_str, key, value)