-
Notifications
You must be signed in to change notification settings - Fork 12
/
execdb.py
86 lines (79 loc) · 3.09 KB
/
execdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from __future__ import division
from io import open
from . import bunch
try: basestring
except: basestring = str
class ExecDB:
"""ExecDB is a flexible way of mapping from dataset ids to
the files needed for analysing that dataset. It is based on two
inputs:
1. The database file containes python code that should define
variables containing the information necessary to load and interpret
the dataset files. The variables available to the code in this
file is setup by a second file - the vars setup file.
2. The vars setup is a file containing python code that will be
called with the variable "id" defined. Its job is to define variables
that can be used in the data file. The idea is that this file is
needs to chance much less often than the other one.
For example, if vars is 'a = len(id)' and db is
'if a[0] > 2: moo = id
else: moo = id[::-1]'
then a query with id = "hello" will result in
{'moo': 'hello', 'id'='hello', 'a': 4}"""
def __init__(self, db_file=None, vars_file=None, db_data=None, vars_data=None,
override=None, root=None):
self.db_source = read_data(db_file, db_data)
if override is not None:
self.db_source += "\n" + expand_override(override)
self.vars_source = read_data(vars_file, vars_data, "")
if root is not None: # Allow relative file names
self.vars_source = """root = "%s"\n""" % root + self.vars_source
if self.db_source is None: raise ValueError("No database specified in ExecDB")
self.db_code = compile(self.db_source, "<exec_db,db_source>", "exec")
self.vars_code = compile(self.vars_source, "<exec_db,vars_source>", "exec")
def __getitem__(self, id): return self.query(id)
def query(self, id):
if not isinstance(id, basestring):
return [self.query(i) for i in id]
globs, locs = {"id":id}, {}
exec(self.vars_code, {}, globs)
exec(self.db_code, globs, locs)
globs.update(locs)
locs = recursive_format(locs, globs)
for key in globs["export"]:
locs[key] = globs[key]
return bunch.Bunch(locs)
def dump(self):
return self.db_source
def read_data(file_or_fname=None, data=None, default=None):
"""Helper function for ExecDB. Gets a string of data
from either a file or the provided data argument"""
if data is not None: return data
if file_or_fname is None: return default
if isinstance(file_or_fname, basestring):
with open(file_or_fname) as f:
return f.read()
else:
return file.read()
def recursive_format(data, formats):
"""Expand all strings contained in dicts, lists or tuples in data
using string.format with the given formats dict"""
if isinstance(data, dict):
data = {key: recursive_format(data[key], formats) for key in data}
elif isinstance(data, list):
data = [recursive_format(val, formats) for val in data]
elif isinstance(data, tuple):
data = tuple([recursive_format(val, formats) for val in data])
elif isinstance(data, basestring):
return data.format(**formats)
return data
def expand_override(desc):
segments = desc.split(";")
olines = []
for seg in segments:
if seg.startswith("@"):
for line in open(seg[1:], "r"):
olines.append(line)
else:
olines.append(seg)
return "\n".join(olines)