From adbdf16067c620aaadc50e9f478d149bd38e6ee0 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Tue, 5 Nov 2024 10:48:30 +0100 Subject: [PATCH 1/3] Add support for bytehound memory prfiling in MacOS --- .gitignore | 1 + Cargo.toml | 5 ++++ profile/Dockerfile.profile | 47 ++++++++++++++++++++++++++++++++++++++ profile/justfile | 19 +++++++++++++++ seafowl.toml | 2 +- 5 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 profile/Dockerfile.profile create mode 100644 profile/justfile diff --git a/.gitignore b/.gitignore index 773173bc..5d4a17f6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ # Memory profiler output memory-profiling_* profile-* +profile/memory-profiling_* # CLI history .history \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 35ed2b96..2bb728df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,3 +163,8 @@ vergen-gitcl = { version = "1", features = ["build", "cargo", "rustc"] } codegen-units = 1 lto = true strip = true + +[profile.release-with-debug] +debug = true +inherits = "release" +strip = false diff --git a/profile/Dockerfile.profile b/profile/Dockerfile.profile new file mode 100644 index 00000000..952f0214 --- /dev/null +++ b/profile/Dockerfile.profile @@ -0,0 +1,47 @@ +# Image that wraps Seafowl with a bytehound binary and records +# memory allocations for profiling +# +# To build run just the bytehound layer run +# docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . +# +# To build the full image run +# docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. + +FROM rust:slim AS bytehound + +RUN apt-get update && \ + apt-get install -y git protobuf-compiler ca-certificates npm && \ + npm install -g yarn + +# Fetch bytehound source and compile +RUN git clone https://github.com/koute/bytehound.git && \ + cd bytehound && \ + cargo build --release -p bytehound-preload && \ + cargo build --release -p bytehound-cli + +FROM bytehound AS profile + +RUN rustup default nightly-2024-10-30 + +# Compile an empty project, so as to cache the compiled deps and avoid unneeded re-compilation. +# Adapted from https://gist.github.com/noelbundick/6922d26667616e2ba5c3aff59f0824cd +RUN cargo new seafowl +WORKDIR seafowl +COPY Cargo.toml Cargo.lock build.rs . +COPY clade clade +COPY datafusion_remote_tables datafusion_remote_tables +COPY migrations migrations +COPY object_store_factory object_store_factory +RUN --mount=type=cache,target=/usr/local/cargo/registry cargo build --profile release-with-debug + +# Copy the rest of the code now and update timestamps to force a new build only for it +COPY src src +RUN --mount=type=cache,target=/usr/local/cargo/registry set -e && \ + touch src/lib.rs src/main.rs && \ + cargo build --profile release-with-debug + +RUN cd .. && mkdir seafowl-data + +ENV MEMORY_PROFILER_OUTPUT=profiles/memory-profiling_%e_%t_%p.dat +ENV LD_PRELOAD=/bytehound/target/release/libbytehound.so +ENTRYPOINT [ "./target/release-with-debug/seafowl" ] diff --git a/profile/justfile b/profile/justfile new file mode 100644 index 00000000..3b8953a7 --- /dev/null +++ b/profile/justfile @@ -0,0 +1,19 @@ +build-bytehound: + docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . + +build-profiler: + docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. + +profile: + docker run \ + -p 8080:8080 -p 47470:47470 \ + -v .:/seafowl/profiles \ + -v `realpath ../seafowl.toml`:/seafowl.toml \ + splitgraph/seafowl:profile -c /seafowl.toml + +view: + docker run -p 8888:8888 -v .:/profiles -w /profiles splitgraph/bytehound \ + /bytehound/target/release/bytehound server -i 0.0.0.0 -p 8888 memory-profiling_* + +clean: + rm -rf memory-profiling_* diff --git a/seafowl.toml b/seafowl.toml index 35846273..3468b1f2 100644 --- a/seafowl.toml +++ b/seafowl.toml @@ -15,4 +15,4 @@ bind_port = 6432 bind_host = "0.0.0.0" [frontend.flight] -bind_host = "127.0.0.1" +bind_host = "0.0.0.0" From eea08a86d89fada8acb60579215997ba5b8c3851 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Wed, 6 Nov 2024 12:33:11 +0100 Subject: [PATCH 2/3] Compile on host and copy the release-with-debug to container during build This is so as to avoid OOM during builds on MacOS. --- profile/Dockerfile.profile | 39 ++++++++++---------------------------- profile/justfile | 16 ++++++++++------ 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/profile/Dockerfile.profile b/profile/Dockerfile.profile index 952f0214..7a8982a1 100644 --- a/profile/Dockerfile.profile +++ b/profile/Dockerfile.profile @@ -2,46 +2,27 @@ # memory allocations for profiling # # To build run just the bytehound layer run -# docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . +# DOCKER_BUILDKIT=1 docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . # # To build the full image run -# docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. +# DOCKER_BUILDKIT=1 docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. FROM rust:slim AS bytehound RUN apt-get update && \ apt-get install -y git protobuf-compiler ca-certificates npm && \ - npm install -g yarn - -# Fetch bytehound source and compile -RUN git clone https://github.com/koute/bytehound.git && \ + npm install -g yarn && \ + git clone https://github.com/koute/bytehound.git && \ cd bytehound && \ cargo build --release -p bytehound-preload && \ cargo build --release -p bytehound-cli -FROM bytehound AS profile - -RUN rustup default nightly-2024-10-30 - -# Compile an empty project, so as to cache the compiled deps and avoid unneeded re-compilation. -# Adapted from https://gist.github.com/noelbundick/6922d26667616e2ba5c3aff59f0824cd -RUN cargo new seafowl -WORKDIR seafowl -COPY Cargo.toml Cargo.lock build.rs . -COPY clade clade -COPY datafusion_remote_tables datafusion_remote_tables -COPY migrations migrations -COPY object_store_factory object_store_factory -RUN --mount=type=cache,target=/usr/local/cargo/registry cargo build --profile release-with-debug - -# Copy the rest of the code now and update timestamps to force a new build only for it -COPY src src -RUN --mount=type=cache,target=/usr/local/cargo/registry set -e && \ - touch src/lib.rs src/main.rs && \ - cargo build --profile release-with-debug +FROM ubuntu AS profile -RUN cd .. && mkdir seafowl-data +RUN mkdir profiles && mkdir seafowl-data +COPY target/aarch64-unknown-linux-gnu/release-with-debug/seafowl seafowl +COPY --from=bytehound /bytehound/target/release/libbytehound.so libbytehound.so ENV MEMORY_PROFILER_OUTPUT=profiles/memory-profiling_%e_%t_%p.dat -ENV LD_PRELOAD=/bytehound/target/release/libbytehound.so -ENTRYPOINT [ "./target/release-with-debug/seafowl" ] +ENV LD_PRELOAD=./libbytehound.so +ENTRYPOINT [ "./seafowl" ] diff --git a/profile/justfile b/profile/justfile index 3b8953a7..869d33bd 100644 --- a/profile/justfile +++ b/profile/justfile @@ -1,19 +1,23 @@ build-bytehound: - docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . + DOCKER_BUILDKIT=1 docker build --target bytehound -f Dockerfile.profile -t splitgraph/bytehound . +# TODO: generailze to support amd64 build-profiler: - docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc + cargo build --target aarch64-unknown-linux-gnu --profile release-with-debug + DOCKER_BUILDKIT=1 docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. profile: docker run \ -p 8080:8080 -p 47470:47470 \ - -v .:/seafowl/profiles \ + -v .:/profiles \ -v `realpath ../seafowl.toml`:/seafowl.toml \ + -v `realpath ../../seafowl-data`:/seafowl-data \ splitgraph/seafowl:profile -c /seafowl.toml -view: - docker run -p 8888:8888 -v .:/profiles -w /profiles splitgraph/bytehound \ - /bytehound/target/release/bytehound server -i 0.0.0.0 -p 8888 memory-profiling_* +view *files='memory-profiling_*': + docker run -p 9999:9999 -v .:/profiles -w /profiles \splitgraph/bytehound \ + /bytehound/target/release/bytehound server -i 0.0.0.0 -p 9999 {{files}} clean: rm -rf memory-profiling_* From 656d36d80250d95d35ffe7688eac7db2662b2342 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Wed, 6 Nov 2024 19:45:10 +0100 Subject: [PATCH 3/3] Add a README for profiling --- profile/README.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++ profile/justfile | 8 +++++-- 2 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 profile/README.md diff --git a/profile/README.md b/profile/README.md new file mode 100644 index 00000000..345ba29a --- /dev/null +++ b/profile/README.md @@ -0,0 +1,60 @@ +## Setup + +If you're on MacOS you'll need cross-compilation tools, since the profiling Seafowl binary is +compiled on the host (compiling inside the container leads to OOMs). Grab one from +https://github.com/messense/homebrew-macos-cross-toolchains. + +The build bytehound and the profiler (Seafowl wrapped in bytehound) images run + +```shell +$ just build-bytehound +$ just build-profiler +``` + +## Measuring + +To actually start profiling run + +```shell +$ just profile +docker run -p 8080:8080 -p 47470:47470 -v .:/profiles -v `realpath ../seafowl.toml`:/seafowl.toml -v `realpath ../../seafowl-data`:/seafowl-data splitgraph/seafowl:profile -c /seafowl.toml +2024-11-06T14:12:11.519272Z INFO main ThreadId(01) seafowl: Starting Seafowl 0.5.8 +2024-11-06T14:12:11.519390Z INFO main ThreadId(01) seafowl: Loading the configuration from /seafowl.toml +2024-11-06T14:12:11.538033Z INFO tokio-runtime-worker ThreadId(12) seafowl: Starting the Arrow Flight frontend on 0.0.0.0:47470 +2024-11-06T14:12:11.538268Z INFO tokio-runtime-worker ThreadId(12) seafowl: Starting the PostgreSQL frontend on 127.0.0.1:6432 +2024-11-06T14:12:11.538275Z WARN tokio-runtime-worker ThreadId(12) seafowl: The PostgreSQL frontend doesn't have authentication or encryption and should only be used in development! +2024-11-06T14:12:11.538321Z INFO tokio-runtime-worker ThreadId(12) seafowl: Starting the HTTP frontend on 0.0.0.0:8080 +... +``` + +and then run your workload against the HTTP/gRPC endpoint. + +Bytehound continually dumps the allocation data into a single file for each run + +```shell +$ tree -h | grep mem +├── [3.8G] memory-profiling_seafowl_1730902150_1.dat +├── [4.3G] memory-profiling_seafowl_1730902331_1.dat +``` + +Once you're done with profiling press ctrl + c to stop the container. + +## Browsing + +To see the data in a web UI either explicitly load a one or more files + +```shell +just view memory-profiling_seafowl_1730902331_1.dat +``` + +or load all recorded files + +```shell +just view +``` + +and open `localhost:9999`. + +Note that you should strive to keep the recorded profiles under 5GB since otherwise the server will +fail to load them. If you must profile a long running process, but want to work around this consider +toggling the recording on and off via `docker kill -s SIGUSR1 seafowl-profiler`. diff --git a/profile/justfile b/profile/justfile index 869d33bd..11caafce 100644 --- a/profile/justfile +++ b/profile/justfile @@ -8,7 +8,7 @@ build-profiler: DOCKER_BUILDKIT=1 docker build -f Dockerfile.profile -t splitgraph/seafowl:profile .. profile: - docker run \ + docker run --rm --name seafowl-profiler \ -p 8080:8080 -p 47470:47470 \ -v .:/profiles \ -v `realpath ../seafowl.toml`:/seafowl.toml \ @@ -16,7 +16,11 @@ profile: splitgraph/seafowl:profile -c /seafowl.toml view *files='memory-profiling_*': - docker run -p 9999:9999 -v .:/profiles -w /profiles \splitgraph/bytehound \ + docker run --rm --name seafowl-profile-server \ + -p 9999:9999 \ + -v .:/profiles \ + -w /profiles \ + \splitgraph/bytehound \ /bytehound/target/release/bytehound server -i 0.0.0.0 -p 9999 {{files}} clean: