Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: reenable arm64 builds for docker #3045

Merged
merged 9 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 11 additions & 17 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ jobs:
build-images:
strategy:
matrix:
# NOTE(robinson) - temporarily disabling arm since the libreoffice packages only
# works on amd right now
docker-platform: ["linux/amd64"]
# docker-platform: ["linux/arm64", "linux/amd64"]
docker-platform: ["linux/arm64", "linux/amd64"]
runs-on: ubuntu-latest-m
needs: set-short-sha
env:
Expand All @@ -53,6 +50,7 @@ jobs:
make docker-dl-packages
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
-f Dockerfile-$ARCH \
--build-arg PIP_VERSION=$PIP_VERSION \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--progress plain \
Expand All @@ -72,8 +70,7 @@ jobs:
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
make docker-test CI=true TEST_FILE=test_unstructured/partition/test_text.py
fi
# NOTE(robinson) - disabling smoke because there's no notebook user anymore
# DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
- name: Push images
run: |
# write to the build repository to cache for the publish-images job
Expand All @@ -97,25 +94,22 @@ jobs:
- name: Pull AMD image
run: |
docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA
# NOTE(robinson) - put this back in when we reenable ARM
# - name: Pull ARM image
# run: |
# docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
- name: Pull ARM image
run: |
docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
- name: Push latest build tags for AMD and ARM
run: |
# these are used to construct the final manifest but also cache-from in subsequent runs
docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
docker push $DOCKER_BUILD_REPOSITORY:amd64
# NOTE(robinson) - update this when we reenable ARM
# docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
# docker push $DOCKER_BUILD_REPOSITORY:arm64
docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
docker push $DOCKER_BUILD_REPOSITORY:arm64
- name: Push multiarch manifest
run: |
# NOTE(robinson) - update this when we reenable ARM
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:latest
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA
VERSION=$(grep -Po '(?<=__version__ = ")[^"]*' unstructured/__version__.py)
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:$VERSION
File renamed without changes.
41 changes: 41 additions & 0 deletions Dockerfile-arm64
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# syntax=docker/dockerfile:experimental
FROM quay.io/unstructured-io/base-images:rocky9.2-9@sha256:73d8492452f086144d4b92b7931aa04719f085c74d16cae81e8826ef873729c9 as base

# NOTE(crag): NB_USER ARG for mybinder.org compat:
# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
ARG NB_USER=notebook-user
ARG NB_UID=1000
ARG PIP_VERSION

# Set up environment
ENV HOME /home/${NB_USER}
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
ENV PATH="/home/usr/.local/bin:${PATH}"

RUN groupadd --gid ${NB_UID} ${NB_USER}
RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
WORKDIR ${HOME}

FROM base as deps
# Copy and install Unstructured
COPY requirements requirements

RUN python3.10 -m pip install pip==${PIP_VERSION} && \
dnf -y groupinstall "Development Tools" && \
find requirements/ -type f -name "*.txt" -exec python3 -m pip install --no-cache -r '{}' ';' && \
dnf -y groupremove "Development Tools" && \
dnf clean all

RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"

FROM deps as code

USER ${NB_USER}

COPY example-docs example-docs
COPY unstructured unstructured

RUN python3.10 -c "from unstructured.partition.model_init import initialize; initialize()"

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion scripts/docker-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ DOCKER_REPOSITORY="${DOCKER_REPOSITORY:-quay.io/unstructured-io/unstructured}"
PIP_VERSION="${PIP_VERSION:-23.1.2}"
DOCKER_IMAGE="${DOCKER_IMAGE:-unstructured:dev}"

DOCKER_BUILD_CMD=(docker buildx build --load -f Dockerfile
DOCKER_BUILD_CMD=(docker buildx build --load -f Dockerfile-amd64
--build-arg PIP_VERSION="$PIP_VERSION"
--build-arg BUILDKIT_INLINE_CACHE=1
--progress plain
Expand Down
14 changes: 10 additions & 4 deletions scripts/docker-smoke-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,16 @@ trap stop_container EXIT
await_container

# Run the tests
docker cp test_unstructured_ingest $CONTAINER_NAME:/app
docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R nonroot:nonroot /app/test_unstructured_ingest"
docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/wikipedia.sh"
if [[ "$DOCKER_IMAGE" == *"arm64"* ]]; then
docker cp test_unstructured_ingest $CONTAINER_NAME:/home/notebook-user
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R 1000:1000 /home/notebook-user/test_unstructured_ingest"
docker exec "$CONTAINER_NAME" /bin/bash -c "/home/notebook-user/test_unstructured_ingest/src/wikipedia.sh"
else
docker cp test_unstructured_ingest $CONTAINER_NAME:/app
docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest
docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R nonroot:nonroot /app/test_unstructured_ingest"
docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/wikipedia.sh"
fi

result=$?
exit $result
Loading