Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Larger file storage #35

Merged
merged 13 commits into from
Mar 27, 2017
Merged
3 changes: 1 addition & 2 deletions adaptivemd/analysis/pyemma/_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def remote_analysis(

import pyemma
import mdtraj as md
from adaptivemd import Model

pdb = md.load(topfile)
topology = pdb.topology
Expand Down Expand Up @@ -140,4 +139,4 @@ def apply_feat_part(featurizer, parts):
}
}

return Model(data)
return data
24 changes: 20 additions & 4 deletions adaptivemd/analysis/pyemma/emma.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import os

from adaptivemd import PythonTask
from adaptivemd.analysis import Analysis
from adaptivemd.mongodb import DataDict
from adaptivemd.model import Model

from _remote import remote_analysis


Expand Down Expand Up @@ -67,13 +72,16 @@ def to_dict(self):
return dct

@staticmethod
def then_func(project, task, model, inputs):
def then_func(project, task, data, inputs):
# add the input arguments for later reference
model.data['input']['trajectories'] = inputs['trajectories']
model.data['input']['pdb'] = inputs['topfile']
data['input']['trajectories'] = inputs['trajectories']
data['input']['pdb'] = inputs['topfile']

# from the task we get the used generator and then its outtype
model.data['input']['modeller'] = task.generator
data['input']['modeller'] = task.generator

# wrapping in a DataDict allows storage of large files!
model = Model(DataDict(data))
project.models.add(model)

def execute(
Expand Down Expand Up @@ -113,6 +121,14 @@ def execute(
# this will fire the then_func from the generator once finished
t = PythonTask(self)

# we handle the returned output ourselves -> its stored as a model
# so do not store the returned JSON also
t.store_output = False

# copy the output.json to a models/model.{uuid}.json file
t.backup_output_json(
os.path.join('project:///models', 'model.' + hex(t.__uuid__) + '.json'))

input_pdb = t.link(self['pdb_file_stage'], 'input.pdb')

trajs = list(trajectories)
Expand Down
23 changes: 19 additions & 4 deletions adaptivemd/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ class File(Location):
_find_by = ['created', 'task']

created = SyncVariable('created', lambda x: x is not None and x < 0)
_file = SyncVariable('_file', lambda x: not bool(x))
task = SyncVariable('task', lambda x: not bool(x))
_file = SyncVariable('_file', lambda x: x is not None)
task = SyncVariable('task', lambda x: x is not None)

def __init__(self, location):
super(File, self).__init__(location)
Expand Down Expand Up @@ -387,6 +387,10 @@ def from_dict(cls, dct):
if '_file_' in dct:
obj._file = base64.b64decode(dct['_file_'])

# print 'set', len(obj._file), obj.__uuid__

# print len(obj._file)

return obj

def get_file(self):
Expand Down Expand Up @@ -446,6 +450,17 @@ def get_file(self):
return None

def load(self, scheduler=None):
if self._data is None:
s = self.get(scheduler)
if s is not None:
self._data = s

return self

def get(self, scheduler=None):
if self._data is not None:
return self._data

path = None

if self.drive == 'file':
Expand All @@ -456,9 +471,9 @@ def load(self, scheduler=None):

if path:
with open(path, 'r') as f:
self._data = _json_file_simplifier.from_json(f.read())
return _json_file_simplifier.from_json(f.read())

return self
return None

@property
def exists(self):
Expand Down
9 changes: 9 additions & 0 deletions adaptivemd/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ class Model(StorableMixin):
"""
A wrapper to hold model data

This uses a special grid storage to save models larger than 16MB

Attributes
----------
data : dict of str : anything
Expand All @@ -13,3 +15,10 @@ class Model(StorableMixin):
def __init__(self, data):
super(Model, self).__init__()
self.data = data

def __getitem__(self, item):
return self.data[item]

def __getattr__(self, item):
if item in self.data:
return self.data[item]
2 changes: 2 additions & 0 deletions adaptivemd/mongodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@
from object import ObjectStore

from proxy import DelayedLoader, lazy_loading_attributes, LoaderProxy

from file import FileStore, DataDict
35 changes: 12 additions & 23 deletions adaptivemd/mongodb/dictify.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,10 @@ def build(self, obj):
attributes = self.build(obj['_dict'])
ret = self.class_list[obj['_cls']].from_dict(attributes)
if '_obj_uuid' in obj:
# vals = {x: getattr(ret, x) for x in ret._find_by}
ret.__uuid__ = int(UUID(obj['_obj_uuid']))
# for k,v in vals.iteritems():
# setattr(ret, )

return ret

Expand Down Expand Up @@ -470,29 +473,14 @@ def to_json(self, obj, base_type=''):
simplified = self.simplify(obj, base_type)
return ujson.dumps(simplified)

# def to_json_object(self, obj):
# if hasattr(obj, 'base_cls') \
# and type(obj) is not type and type(obj) is not abc.ABCMeta:
# simplified = self.simplify_object(obj)
# else:
# simplified = self.simplify(obj)
# try:
# json_str = ujson.dumps(simplified)
# except TypeError as e:
# err = (
# 'Cannot convert object of type `%s` to json. '
# '\n__dict__: %s\n'
# '\nsimplified: %s\n'
# '\nError: %s'
# ) % (
# obj.__class__.__name__,
# obj.__dict__,
# simplified,
# str(e)
# )
# raise ValueError(err)
#
# return json_str
def to_json_object(self, obj):
if hasattr(obj, 'base_cls') \
and type(obj) is not type and type(obj) is not abc.ABCMeta:
simplified = self.simplify_object(obj)
else:
simplified = self.simplify(obj)

return ujson.dumps(simplified)

def from_json(self, json_string):
simplified = ujson.loads(json_string)
Expand All @@ -507,6 +495,7 @@ def from_json(self, json_string):

def from_simple_dict(self, simplified):
obj = self.build(simplified)

obj.__uuid__ = int(UUID(simplified.get('_id')))
obj.__time__ = simplified.get('_time', 0) # use time or 0 if unset

Expand Down
Loading