-
Notifications
You must be signed in to change notification settings - Fork 187
/
datastructures.py
211 lines (165 loc) · 8.48 KB
/
datastructures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Module to define commonly used data structures."""
from enum import Enum, IntEnum
from .extendeddicts import DefaultFieldsAttributeDict
__all__ = ('CalcJobState', 'CalcInfo', 'CodeInfo', 'CodeRunMode')
class CalcJobState(Enum):
"""The sub state of a CalcJobNode while its Process is in an active state (i.e. Running or Waiting)."""
UPLOADING = 'uploading'
SUBMITTING = 'submitting'
WITHSCHEDULER = 'withscheduler'
RETRIEVING = 'retrieving'
PARSING = 'parsing'
class CalcInfo(DefaultFieldsAttributeDict):
"""
This object will store the data returned by the calculation plugin and to be
passed to the ExecManager.
In the following descriptions all paths have to be considered relative
* retrieve_list: a list of strings or tuples that indicate files that are to be retrieved from the remote
after the calculation has finished and stored in the repository in a FolderData.
If the entry in the list is just a string, it is assumed to be the filepath on the remote and it will
be copied to '.' of the repository with name os.path.split(item)[1]
If the entry is a tuple it is expected to have the following format
('remotepath', 'localpath', depth)
If the 'remotepath' is a file or folder, it will be copied in the repository to 'localpath'.
However, if the 'remotepath' contains file patterns with wildcards, the 'localpath' should be set to '.'
and the depth parameter should be an integer that decides the localname. The 'remotepath' will be split on
file separators and the local filename will be determined by joining the N last elements, where N is
given by the depth variable.
Example: ('some/remote/path/files/pattern*[0-9].xml', '.', 2)
Will result in all files that match the pattern to be copied to the local repository with path
'files/pattern*[0-9].xml'
* retrieve_temporary_list: a list of strings or tuples that indicate files that will be retrieved
and stored temporarily in a FolderData, that will be available only during the parsing call.
The format of the list is the same as that of 'retrieve_list'
* retrieve_singlefile_list: a list of tuples with format
('linkname_from calc to singlefile', 'subclass of singlefile', 'filename')
Each tuple represents a file that will be retrieved from cluster and saved in SinglefileData nodes
.. deprecated:: 1.0.0
Will be removed in `v2.0.0`, use `retrieve_temporary_list` instead.
* local_copy_list: a list of tuples with format ('node_uuid', 'filename', relativedestpath')
* remote_copy_list: a list of tuples with format ('remotemachinename', 'remoteabspath', 'relativedestpath')
* remote_symlink_list: a list of tuples with format ('remotemachinename', 'remoteabspath', 'relativedestpath')
* provenance_exclude_list: a sequence of relative paths of files in the sandbox folder of a `CalcJob` instance that
should not be stored permanantly in the repository folder of the corresponding `CalcJobNode` that will be
created, but should only be copied to the remote working directory on the target computer. This is useful for
input files that should be copied to the working directory but should not be copied as well to the repository
either, for example, because they contain proprietary information or because they are big and their content is
already indirectly present in the repository through one of the data nodes passed as input to the calculation.
* codes_info: a list of dictionaries used to pass the info of the execution of a code
* codes_run_mode: a string used to specify the order in which multi codes can be executed
"""
_default_fields = (
'job_environment',
'email',
'email_on_started',
'email_on_terminated',
'uuid',
'prepend_text',
'append_text',
'num_machines',
'num_mpiprocs_per_machine',
'priority',
'max_wallclock_seconds',
'max_memory_kb',
'rerunnable',
'retrieve_list',
'retrieve_temporary_list',
'retrieve_singlefile_list', # Deprecated as of 1.0.0, use instead `retrieve_temporary_list`
'local_copy_list',
'remote_copy_list',
'remote_symlink_list',
'provenance_exclude_list',
'codes_info',
'codes_run_mode'
)
class CodeInfo(DefaultFieldsAttributeDict):
"""
This attribute-dictionary contains the information needed to execute a code.
Possible attributes are:
* ``cmdline_params``: a list of strings, containing parameters to be written on
the command line right after the call to the code, as for example::
code.x cmdline_params[0] cmdline_params[1] ... < stdin > stdout
* ``stdin_name``: (optional) the name of the standard input file. Note, it is
only possible to use the stdin with the syntax::
code.x < stdin_name
If no stdin_name is specified, the string "< stdin_name" will not be
passed to the code.
Note: it is not possible to substitute/remove the '<' if stdin_name is specified;
if that is needed, avoid stdin_name and use instead the cmdline_params to
specify a suitable syntax.
* ``stdout_name``: (optional) the name of the standard output file. Note, it is
only possible to pass output to stdout_name with the syntax::
code.x ... > stdout_name
If no stdout_name is specified, the string "> stdout_name" will not be
passed to the code.
Note: it is not possible to substitute/remove the '>' if stdout_name is specified;
if that is needed, avoid stdout_name and use instead the cmdline_params to
specify a suitable syntax.
* ``stderr_name``: (optional) a string, the name of the error file of the code.
* ``join_files``: (optional) if True, redirects the error to the output file.
If join_files=True, the code will be called as::
code.x ... > stdout_name 2>&1
otherwise, if join_files=False and stderr is passed::
code.x ... > stdout_name 2> stderr_name
* ``withmpi``: if True, executes the code with mpirun (or another MPI installed
on the remote computer)
* ``code_uuid``: the uuid of the code associated to the CodeInfo
"""
_default_fields = (
'cmdline_params', # as a list of strings
'stdin_name',
'stdout_name',
'stderr_name',
'join_files',
'withmpi',
'code_uuid'
)
class CodeRunMode(IntEnum):
"""Enum to indicate the way the codes of a calculation should be run.
For PARALLEL, the codes for a given calculation will be run in parallel by running them in the background::
code1.x &
code2.x &
For the SERIAL option, codes will be executed sequentially by running for example the following::
code1.x
code2.x
"""
SERIAL = 0
PARALLEL = 1
class LazyStore:
"""
A container that provides a mapping to objects based on a key, if the object is not
found in the container when it is retrieved it will created using a provided factory
method
"""
def __init__(self):
self._store = {}
def get(self, key, factory):
"""
Get a value in the store based on the key, if it doesn't exist it will be created
using the factory method and returned
:param key: the key of the object to get
:param factory: the factory used to create the object if necessary
:return: the object
"""
try:
return self._store[key]
except KeyError:
obj = factory()
self._store[key] = obj
return obj
def pop(self, key):
"""
Pop an object from the store based on the given key
:param key: the object key
:return: the object that was popped
"""
return self._store.pop(key)