Skip to content

Better element IDs - deterministic and document-unique hashes (#2673) #1172

Better element IDs - deterministic and document-unique hashes (#2673)

Better element IDs - deterministic and document-unique hashes (#2673) #1172

name: Build And Push Docker Image
on:
push:
branches:
- main
env:
DOCKER_REPOSITORY: quay.io/unstructured-io/unstructured
DOCKER_BUILD_REPOSITORY: quay.io/unstructured-io/build-unstructured
PIP_VERSION: "23.2.1"
PYTHON_VERSION: "3.10"
jobs:
set-short-sha:
runs-on: ubuntu-latest
outputs:
short_sha: ${{ steps.set_short_sha.outputs.short_sha }}
steps:
- name: Set Short SHA
id: set_short_sha
run: echo "short_sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT
build-images:
strategy:
matrix:
docker-platform: ["linux/arm64", "linux/amd64"]
runs-on: ubuntu-latest-m
needs: set-short-sha
env:
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
steps:
- name: Set up Docker
# Use the `docker` driver for AMD builds because the `docker-container` driver may fail to locally load the built image.
# This could be due to the larger size of the AMD build and the `docker-container` driver needing to load the tarball.
# Use the `docker-container` driver for ARM builds because it may otherwise intermittently fail with: `exec /bin/sh: exec format error`
uses: docker/setup-buildx-action@v1
with:
driver: ${{ matrix.docker-platform == 'linux/amd64' && 'docker' || 'docker-container' }}
- name: Checkout code
uses: actions/checkout@v3
- name: Login to Quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_IO_ROBOT_USERNAME }}
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
- name: Build images
run: |
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
--build-arg PIP_VERSION=$PIP_VERSION \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--progress plain \
--cache-from $DOCKER_BUILD_REPOSITORY:$ARCH \
-t $DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA .
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Test images
run: |
echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file
echo "UNSTRUCTURED_HF_TOKEN=${{ secrets.HF_TOKEN }}" >> uns_test_env_file
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
if [ "$ARCH" = "amd64" ]; then
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
make docker-test CI=true
else
DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \
make docker-test CI=true TEST_FILE=test_unstructured/partition/test_text.py
fi
DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test
- name: Push images
run: |
# write to the build repository to cache for the publish-images job
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
docker push "$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA"
publish-images:
runs-on: ubuntu-latest-m
needs: [set-short-sha, build-images]
env:
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
steps:
- uses: docker/setup-buildx-action@v1
- name: Checkout code
uses: actions/checkout@v3
- name: Login to Quay.io
uses: docker/login-action@v1
with:
registry: quay.io
username: ${{ secrets.QUAY_IO_ROBOT_USERNAME }}
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
- name: Pull AMD image
run: |
docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA
- name: Pull ARM image
run: |
docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
- name: Push latest build tags for AMD and ARM
run: |
# these are used to construct the final manifest but also cache-from in subsequent runs
docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
docker push $DOCKER_BUILD_REPOSITORY:amd64
docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
docker push $DOCKER_BUILD_REPOSITORY:arm64
- name: Push multiarch manifest
run: |
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:latest
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA
VERSION=$(grep -Po '(?<=__version__ = ")[^"]*' unstructured/__version__.py)
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
docker manifest push $DOCKER_REPOSITORY:$VERSION