-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_me.py
337 lines (286 loc) · 11.6 KB
/
run_me.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
from pathlib import Path
import logging
import re
import webbrowser
from time import sleep
import docker
from docker.errors import ImageNotFound
from requests.exceptions import ConnectionError
from download_data import download
# TODO: fix docker.errors.APIError: 409 Client Error: Conflict ("Conflict. The container name "/mastering-pyspark-ml"
# is already in use by container "5e645605bbef237ecabe3366dd512de182ac76505e3ff1cff16fb3d5af748cdf". You have to
# remove (or rename) that container to be able to reuse that name.")
logging.basicConfig(
level=logging.DEBUG, format="%(levelname)-7s %(name)-19s %(message)s", style="%"
)
logger = logging.getLogger("CourseHandler")
def re_search(pattern, text, plural=False):
"""Regex helper to find strings in a body of text"""
match = [m.group(1) for m in re.finditer(pattern, text)]
if plural:
return match
else:
if match:
return match[0]
def look_ahead(iter_item):
"""
This function can be used to determine if there are items left after the current
value in the iterable. It passes through all values from the given iterable,
augmented by the information if there are more values to come after the current
one. The data is enumerated as to provide an index key as well.
Passes through all values from the given iterable augmented with a index number
(i) and a boolean that determines if the current value is the last value.
- (False) if there are more values after the current one
- (True) if it is the last value
:param iter_item: iterable item
:return: yields index, value, last_flag
Code example:
>>> for i, v, last in look_ahead(["this","is","a","test"]):
... print(i, v, last)
... if last:
... print("The End")
0 this False
1 is False
2 a False
3 test True
The End
Inspired by: (http://stackoverflow.com/questions/1630320/what-is-the-pythonic-way-to
-detect-the-last-element-in-a-python-for-loop)
"""
# Get an iterator and pull the first value (we are starting from the second value
ii = iter(iter_item)
last_value = next(ii)
last_i = 0
# Run the iterator to exhaustion. As long as loop is running we have not reached
# the last value yet
for i, value in enumerate(ii):
# Report the *previous* value
yield i, last_value, False
last_value = value
last_i = i + 1
# Report the last value. Since the loop has finished we have reached the last value
yield last_i, last_value, True
class Course:
"""Class that constructs the environment for this course"""
author = "Danny Meijer"
copyright = 2020
find_me_on_the_web = {
"LinkedIn": "https://www.linkedin.com/in/dannydatascientist/",
"GitHub": "https://github.com/dannymeijer",
"Email": "[email protected]",
}
@staticmethod
def close(self):
print("Thank you!")
repo_path = Path(__file__).resolve().parent
Dockerfile = open(repo_path / "Dockerfile", "r").read()
author = re_search('ARG AUTHOR="(.+)"', Dockerfile)
version = re_search('ARG VERSION="([\w-]+)"', Dockerfile)
container_name = re_search('ARG CONTAINER_NAME="([\w-]+)"', Dockerfile)
full_name = re_search('ARG COURSE_NAME="([\w\s]+)"', Dockerfile)
short_name = full_name.lower().replace(" ", "_")
repo_name = Path(repo_path).name
home = Path(re_search('ENV HOME="(.+)"', Dockerfile))
ports = re_search("EXPOSE ([0-9/tcudp]+)", Dockerfile, plural=True)
tag = "{tag}:{target}".format(tag=container_name, target=version)
client = None
image = None
container = None
def __init__(self):
self._client()
self._image()
self._container()
@property
def volumes(self):
"""volumes property dynamically builds volume configuration based on the
course Sections, expecting a data-sets folder and creating a work folder
:return: dictionary to configure volumes mounted inside the container
"""
return {
# Mount data-sets
self.repo_path / "data-sets": {
"bind": (self.home / "data-sets").as_posix(),
"mode": "rw",
},
# Mount work folder
self.repo_path / "work": {
"bind": (self.home / "work").as_posix(),
"mode": "rw",
},
# Mount each section
**{
section_path: {
"bind": (self.home / self.short_name / section_path.name).as_posix(),
"mode": "rw",
}
for section_path in list(self.repo_path.glob("Section*"))
},
}
def _client(self):
"""Initiates a Docker client
:returns: DockerClient
"""
try:
logger.info("Connecting to Docker API")
if not self.client:
self.client = docker.from_env()
return self.client
except ConnectionError:
raise RuntimeError(
"Something went wrong. Unable to connect to Docker. Please verify that "
"the Docker Desktop Client is running"
)
def get_image(self, tag):
try:
self.image = self.client.images.get(tag)
except docker.errors.ImageNotFound:
self.image = None
logger.debug("image value: %s", self.image)
return self.image
def remove_image(self, tag):
# FOR DEBUG AND DEVELOPMENT PURPOSES ONLY
try:
self.client.images.remove(tag)
logger.info("Successfully removed image, %s", tag)
except docker.errors.ImageNotFound:
logger.warning("Unable to remove image, ImageNotFound %s", tag)
def build_image(self):
"""Equivalent of `docker build --rm -f "Dockerfile" -t $COURSE_NAME .`
:return: None
"""
logger.info("Building Docker image")
build_logger = logging.getLogger("docker.api.build")
progress_log = dict()
# Using low level Docker API to be able to report status
# Builds can take a while!
build_logger.info("Initiating (this might take a few moments)")
for i, log, last in look_ahead(
self.client.api.build(
tag=self.tag,
path=str(self.repo_path),
dockerfile="Dockerfile",
rm=True,
decode=True,
)
):
if i == 0:
build_logger.info("Build process started...")
# Retrieve status from stream
status = log.get("status")
progress_detail = log.get("progressDetail")
i_d = log.get("id", None)
if i_d:
if status in ["Downloading", "Extracting"] and progress_detail:
progress_log[i_d] = {
"status": status,
"total": progress_detail.get("total", 1),
"current": progress_detail.get("current", 0),
}
# Report progress every few lines from the stream
if (last or (i + 1) % 100 == 0) and progress_log.__len__() > 0:
# Construct log message
build_logger.info(
" {total_progress} | {total} Packages, {downloading} Downloading, "
"{extracting} Extracted".format(
downloading=len(
[
log
for log in progress_log.values()
if log.get("status") == "Downloading"
]
),
extracting=len(
[
log
for log in progress_log.values()
if log.get("status") == "Extracting"
]
),
total_progress="{0:.2f}%".format(
(
float(sum([v["current"] for v in progress_log.values()]))
/ float(sum([v["total"] for v in progress_log.values()]))
)
* 100
),
total=progress_log.__len__(),
)
)
build_logger.info("Image was built successfully")
self.image = self.get_image(self.tag)
logger.info("Image ID: %s", self.image.id)
def _image(self):
"""Retrieves docker image (if available)
:return: docker image object
"""
logger.info("Checking if Docker image is already set-up")
if self.get_image(self.tag):
self.image.reload()
else:
logger.warning("Docker image has not been built yet")
self.build_image()
logger.info("Image looks good!")
return self.image
def get_container(self, tag):
try:
self.container = self.client.containers.get(tag)
except docker.errors.NotFound:
self.container = None
return self.container
def run_container(self):
"""This method is responsible for running the container.
Functionally similar to running Docker CLI command:
```docker run -v ${volumes} --rm -d -p ${ports} --name ${name} ${image}```
The following things are set/handled
- ensures the container is using the correct image version and that it is named
correctly
- exposes ports as set up by the port_map property
- mounts the correct volumes inside of the container as per the volumes property
- detach=True ensures that Container runs in detached head mode
- remove=True ensures that the Container is removed once it is stopped
:return: docker container object
"""
# TODO: check if ports are available
# TODO: dynamic port mapping
port_map = {"{}/tcp".format(p): ("127.0.0.1", p) for p in self.ports}
self.container = self.client.containers.run(
ports=port_map,
volumes=self.volumes,
image=self.tag,
name=self.container_name,
detach=True,
remove=True,
)
# TODO: handle docker.errors.APIError
return self.container
def _container(self):
"""Retrieves docker container (if available)
:return: docker container object
"""
logger.info("Checking if course container is running already")
if not self.get_container(self.tag):
logger.warning("Course's Docker container is not running yet")
self.run_container()
return self.container
if __name__ == "__main__":
logger.info("Welcome to '%s' by %s", Course.full_name, Course.author)
logger.info("Course Version: %s", Course.version)
logger.info("Course Name: %s", Course.full_name)
logger.info("Container Name: %s", Course.container_name)
logger.debug("Ports that will be Exposed: %s", Course.ports)
logger.debug("Container Tag: %s", Course.tag)
logger.info("Downloading the data")
download()
# Set up the course
course = Course()
course_url = "http://localhost:{port}/lab?token={token}".format(
port=8888, token=re_search('ENV JUPYTER_TOKEN "([\w-]+)"', course.Dockerfile)
)
logger.info(
"The Course environment is available at [%s] \nIt will automatically "
"open in your web browser within the next 15 seconds",
course_url,
)
sleep(12)
logger.info("Enjoy the Course!")
webbrowser.open(course_url)