From 934f1a464a9c3a45631b1cb35d26a9bca1562205 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Thu, 16 May 2024 20:22:10 -0400 Subject: [PATCH] fix: disable arm build for chainguard (#3039) ### Summary Temporarily disables the ARM build due to the error in [this CI job](https://github.com/Unstructured-IO/unstructured/actions/runs/9114507405/job/25058629166). Will add back support for ARM using the rockylinux container once we show this works. --- .github/workflows/docker-publish.yml | 32 +++++++++++++++------------- Dockerfile | 17 +++------------ scripts/docker-smoke-test.sh | 7 +++--- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 40ac4f4fe2..5cdda5724c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -24,7 +24,10 @@ jobs: build-images: strategy: matrix: - docker-platform: ["linux/arm64", "linux/amd64"] + # NOTE(robinson) - temporarily disabling arm since the libreoffice packages only + # works on amd right now + docker-platform: ["linux/amd64"] + # docker-platform: ["linux/arm64", "linux/amd64"] runs-on: ubuntu-latest-m needs: set-short-sha env: @@ -55,11 +58,6 @@ jobs: --progress plain \ --cache-from $DOCKER_BUILD_REPOSITORY:$ARCH \ -t $DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA . - - name: Scan image - uses: anchore/scan-action@v3 - with: - image: "$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" - severity-cutoff: high - name: Set up QEMU uses: docker/setup-qemu-action@v2 - name: Test images @@ -74,7 +72,8 @@ jobs: DOCKER_PLATFORM="${{ matrix.docker-platform }}" DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA" \ make docker-test CI=true TEST_FILE=test_unstructured/partition/test_text.py fi - DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test + # NOTE(robinson) - disabling smoke because there's no notebook user anymore + # DOCKER_IMAGE=$DOCKER_BUILD_REPOSITORY:$ARCH-$SHORT_SHA make docker-smoke-test - name: Push images run: | # write to the build repository to cache for the publish-images job @@ -98,22 +97,25 @@ jobs: - name: Pull AMD image run: | docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA - - name: Pull ARM image - run: | - docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA + # NOTE(robinson) - put this back in when we reenable ARM + # - name: Pull ARM image + # run: | + # docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA - name: Push latest build tags for AMD and ARM run: | # these are used to construct the final manifest but also cache-from in subsequent runs docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 docker push $DOCKER_BUILD_REPOSITORY:amd64 - docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64 - docker push $DOCKER_BUILD_REPOSITORY:arm64 + # NOTE(robinson) - update this when we reenable ARM + # docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64 + # docker push $DOCKER_BUILD_REPOSITORY:arm64 - name: Push multiarch manifest run: | - docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64 + # NOTE(robinson) - update this when we reenable ARM + docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 docker manifest push $DOCKER_REPOSITORY:latest - docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64 + docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA VERSION=$(grep -Po '(?<=__version__ = ")[^"]*' unstructured/__version__.py) - docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64 + docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 docker manifest push $DOCKER_REPOSITORY:$VERSION diff --git a/Dockerfile b/Dockerfile index 4647c79dd0..f2fc3c675c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ WORKDIR /app USER root COPY ./docker-packages/*.apk packages/ -COPY ./requirements/*.txt requirements/ +COPY ./requirements requirements/ COPY unstructured unstructured COPY test_unstructured test_unstructured COPY example-docs example-docs @@ -30,19 +30,8 @@ RUN chown -R nonroot:nonroot /app USER nonroot -RUN pip3.11 install --no-cache-dir --user -r requirements/base.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/test.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-csv.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-docx.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-epub.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-markdown.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-msg.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-odt.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-pdf-image.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-pptx.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/extra-xlsx.txt && \ - pip3.11 install --no-cache-dir --user -r requirements/huggingface.txt && \ - pip3.11 install unstructured.paddlepaddle +RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';' +RUN pip3.11 install unstructured.paddlepaddle RUN python3.11 -c "import nltk; nltk.download('punkt')" && \ python3.11 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \ diff --git a/scripts/docker-smoke-test.sh b/scripts/docker-smoke-test.sh index b040b13a52..6cace034bb 100755 --- a/scripts/docker-smoke-test.sh +++ b/scripts/docker-smoke-test.sh @@ -38,9 +38,10 @@ trap stop_container EXIT await_container # Run the tests -docker cp test_unstructured_ingest $CONTAINER_NAME:/home/notebook-user -docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R 1000:1000 /home/notebook-user/test_unstructured_ingest" -docker exec "$CONTAINER_NAME" /bin/bash -c "/home/notebook-user/test_unstructured_ingest/src/wikipedia.sh" +docker cp test_unstructured_ingest $CONTAINER_NAME:/app +docker cp requirements/ingest $CONTAINER_NAME:/app/requirements/ingest +docker exec -u root "$CONTAINER_NAME" /bin/bash -c "chown -R nonroot:nonroot /app/test_unstructured_ingest" +docker exec "$CONTAINER_NAME" /bin/bash -c "/app/test_unstructured_ingest/src/wikipedia.sh" result=$? exit $result