Skip to content

Commit

Permalink
Disable docker sandbox (#4649)
Browse files Browse the repository at this point in the history
* Revert "Disable the Sandbox build in Docker (#4515)"

This reverts commit 661ab6d.

* Remove submodule step from TPU CI

* Update cloudbuild.yaml

* Remove separate TF build step

* Add bazel WORKSPACE and rc

* Update Dockerfile

* throw more compute at the TPU CI build

* more bazel jobs

* Revert to old machine type and default to local builds.

* Revert to default nonsandbox

* Remove jobs limitation

* Try smaller shm

* Fix build of llvm in sandbox

* Mount 7g of ram properly

* TEMP remove pytorch build

* remove install of packages

* confirm shm creation

* Revert machine type

---------

Co-authored-by: Will Cromar <[email protected]>
Co-authored-by: Will Cromar <[email protected]>
  • Loading branch information
3 people authored Feb 17, 2023
1 parent b862368 commit 3f091bd
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 30 deletions.
2 changes: 0 additions & 2 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ build --announce_rc
# TODO(goranpetrovic): figure out visibility of tensorflow libraries.
build --nocheck_visibility

#build --define open_source_build=true

# We can set this to `standalone` after https://github.com/bazelbuild/bazel/issues/15359 is resolved.
build --spawn_strategy=sandboxed

Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ENV BUNDLE_LIBTPU "${tpuvm}"
ENV BAZEL_JOBS "${bazel_jobs}"

# This makes the bazel build behave more consistently, but runs slower.
ENV XLA_SANDBOX_BUILD "0"
ENV XLA_SANDBOX_BUILD "1"
ENV XLA_SANDBOX_BASE "/dev/shm"

# To get around issue of Cloud Build with recursive submodule update
Expand Down
19 changes: 10 additions & 9 deletions docker/experimental/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,28 +70,29 @@ WORKDIR /pytorch/xla/
FROM builder AS artifacts

COPY third_party/ third_party/

COPY tf_patches/ tf_patches/
COPY .bazelrc .
COPY .bazelversion .
COPY WORKSPACE .
COPY build_torch_xla_libs.sh .

# TODO: Remove this when it's not required anymore
ENV XLA_SANDBOX_BUILD=0
ENV XLA_SANDBOX_BUILD=1
ENV XLA_SANDBOX_BASE "/dev/shm"

ARG tpuvm
ARG cuda
ARG tf_cuda_compute_capabilities
ARG bazel_jobs
RUN TPUVM_MODE=${tpuvm} XLA_CUDA=${cuda} BAZEL_JOBS=${bazel_jobs} TF_CUDA_COMPUTE_CAPABILITIES=${tf_cuda_compute_capabilities} bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1

COPY torch_xla/ torch_xla/
COPY setup.py .
COPY xla_native_functions.yaml .

COPY scripts/ scripts/

ARG tpuvm
ARG cuda
ARG tf_cuda_compute_capabilities
ARG bazel_jobs
ARG build_cpp_tests
ARG package_version
RUN TORCH_XLA_VERSION=${package_version} BUILD_CPP_TESTS=${build_cpp_tests} TPUVM_MODE=${tpuvm} BUNDLE_LIBTPU=${tpuvm} XLA_CUDA=${cuda} TF_CUDA_COMPUTE_CAPABILITIES=${tf_cuda_compute_capabilities} python setup.py bdist_wheel
RUN --mount=type=tmpfs,target=/dev/shm,rw TORCH_XLA_VERSION=${package_version} BUILD_CPP_TESTS=${build_cpp_tests} TPUVM_MODE=${tpuvm} BUNDLE_LIBTPU=${tpuvm} XLA_CUDA=${cuda} TF_CUDA_COMPUTE_CAPABILITIES=${tf_cuda_compute_capabilities} python setup.py bdist_wheel

# Expunge cache to keep image size under control
RUN bazel clean --expunge
Expand Down
8 changes: 0 additions & 8 deletions docker/experimental/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
steps:
# We only need to update submodules in triggers. User must update submodule
# before local runs because .git is not present.
- name: 'alpine/git'
entrypoint: sh
args:
- -c
- |
git submodule update --init || echo No git repository found
- id: 'common-flags'
name: 'bash'
args:
Expand Down
10 changes: 0 additions & 10 deletions test/tpu/cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ substitutions:
# Location of GKE cluster.
_CLUSTER_ZONE: 'europe-west4-a'
steps:
# We only need to update submodules in triggers. User must update submodule
# before local runs because .git is not present.
- name: 'alpine/git'
entrypoint: sh
args:
- -c
- |
git submodule update --init || echo No git repository found
- name: 'docker'
id: build-image
args: [
Expand All @@ -26,8 +18,6 @@ steps:
'--shm-size=16G',
'--build-arg',
'tpuvm=1',
'--build-arg',
'bazel_jobs=8'
]
- name: 'docker'
id: push-image
Expand Down

0 comments on commit 3f091bd

Please sign in to comment.