-
Notifications
You must be signed in to change notification settings - Fork 5
/
Dockerfile
58 lines (46 loc) · 1.55 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# This is just a dependency, for copying...
FROM ukwa/webarchive-discovery AS dep-env
# Switch to UKWA Hadoop 0.20 + Python 3 base image:
FROM ukwa/docker-hadoop:2.1.2
# Switch to root user while installing software:
USER root
# Additional dependencies required to support Snappy compression:
RUN apt-get update && \
apt-get install -y \
libsnappy-dev \
g++ \
git \
zip \
rustc \
cargo \
libssl-dev \
libffi-dev \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Install the dependencies:
COPY requirements.txt /ukwa_manage/requirements.txt
RUN cd /ukwa_manage && \
pip install -U setuptools pip wheel && \
pip install --no-cache-dir git+https://github.com/ukwa/[email protected] && \
echo Installing requirements.txt && \
pip install --no-cache-dir -r requirements.txt
# Install the package:
RUN pip freeze
COPY setup.py /ukwa_manage/
COPY README.md /ukwa_manage/
COPY MANIFEST.in /ukwa_manage/
COPY lib /ukwa_manage/lib
RUN cd /ukwa_manage && pip install .
# Also copy in shell script helpers and configuration:
COPY scripts/* /usr/local/bin/
COPY mrjob.conf /etc/mrjob.conf
COPY mrjob_h3.conf /etc/mrjob_h3.conf
# Copy in the JARs from the dependent container:
COPY --from=dep-env /jars/* /usr/local/bin/
# Default entrypoint from ukwa/docker-hadoop is entrypoint-h3.sh, so default config is H3:
ENV MRJOB_CONF=/etc/mrjob_h3.conf
# Add the BL-ENT-CA
COPY *.crt /usr/local/share/ca-certificates/
RUN update-ca-certificates
# Switch back to access user for running code:
USER access