From 78106db9596161ddd0a1277aff7ce3c42da26380 Mon Sep 17 00:00:00 2001 From: Wonbin Jin <116508975+wbjin@users.noreply.github.com> Date: Tue, 10 Sep 2024 06:37:19 +0900 Subject: [PATCH] Docs: CPU energy measurement with RAPL (#118) Co-authored-by: Jae-Won Chung --- docs/getting_started/index.md | 82 +++++++++++++++++++++++++++-------- docs/measure/index.md | 38 ++++++++++++++++ zeus/monitor/energy.py | 4 +- 3 files changed, 105 insertions(+), 19 deletions(-) diff --git a/docs/getting_started/index.md b/docs/getting_started/index.md index a4e84bbc..45da1a10 100644 --- a/docs/getting_started/index.md +++ b/docs/getting_started/index.md @@ -47,20 +47,21 @@ The default command would be: ``` { .sh .annotate } docker run -it \ - --gpus all \ # (1)! - --cap-add SYS_ADMIN \ # (2)! - --ipc host \ # (3)! + --gpus all \ # (1)! + --cap-add SYS_ADMIN \ # (2)! + --ipc host \ # (3)! + -v /sys/class/powercap/intel-rapl:/zeus_sys/class/powercap/intel-rapl \ # (4)! mlenergy/zeus:latest \ bash ``` -1. Mounts all GPUs into the Docker container. -2. `SYS_ADMIN` capability is needed to change the GPU's power limit or frequency. See [here](#system-privileges). +1. Mounts all GPUs into the Docker container. See [Docker docs](https://docs.docker.com/engine/containers/resource_constraints/#expose-gpus-for-use) for more about the `--gpus` argument. +2. The `SYS_ADMIN` Linux security capability is needed to change the GPU's power limit or frequency. See [here](#system-privileges) for details and alternatives. 3. PyTorch DataLoader workers need enough shared memory for IPC. Without this, they may run out of shared memory and die. +4. Zeus reads Intel RAPL metrics for CPU/DRAM energy measurement through the `sysfs` interface. Docker disables this by default, so we need to mount it into the container separately (under `/zeus_sys`). -!!! Tip "Overriding Zeus installation" - Inside the container, `zeus`'s installation is editable (`pip install -e`). - So, you can mount your locally modified Zeus repository into the right path in the container (`-v /path/to/zeus:/workspace/zeus`), and your modifications will automatically be applied without you having to run `pip install` again. +Especially, `--cap-add SYS_ADMIN` is to be able to change the GPU's power limit or frequency, and `-v /sys/class/powercap/intel-rapl:/zeus_sys/class/powercap/intel-rapl` is to be able to measure CPU/DRAM energy via Intel RAPL. +See [System privileges](#system-privileges) for details. ### Pulling from Docker Hub @@ -81,20 +82,64 @@ cd zeus docker build -t mlenergy/zeus:master --build-arg TARGETARCH=amd64 -f docker/Dockerfile . ``` -## System privileges +## Verifying installation + +After installing the Zeus package, you can run the following to see whether packages and hardware are properly detected by Zeus. + +```console +$ python -m zeus.show_env +================================================================================ + +Python version: 3.9.19 + +================================================================================ + +[2024-09-09 16:40:14,495] [zeus.utils.framework](framework.py:25) PyTorch with CUDA support is available. +[2024-09-09 16:40:14,496] [zeus.utils.framework](framework.py:45) JAX is not available + +Package availability and versions: + Zeus: 0.10.0 + PyTorch: 2.4.1+cu121 + JAX: not available + +================================================================================ + +[2024-09-09 16:40:14,512] [zeus.device.gpu.nvidia](nvidia.py:46) pynvml is available and initialized. -!!! Important "Nevermind if you're just measuring GPU energy" - No special system-level privileges are needed if you are just measuring GPU time and energy. - However, when you're looking into optimizing energy and if that method requires changing the GPU's power limit or SM frequency, special system-level privileges are required. +GPU availability: + GPU 0: NVIDIA A40 + +================================================================================ + +[2024-09-09 16:40:14,519] [zeus.device.cpu.rapl](rapl.py:136) RAPL is available. +[2024-09-09 16:40:14,519] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj +[2024-09-09 16:40:14,528] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0/energy_uj +[2024-09-09 16:40:14,533] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/energy_uj +[2024-09-09 16:40:14,535] [RaplWraparoundTracker](rapl.py:82) Monitoring wrap around of /sys/class/powercap/intel-rapl/intel-rapl:1/intel-rapl:1:0/energy_uj + +CPU availability: + CPU 0: + CPU measurements available (/sys/class/powercap/intel-rapl/intel-rapl:0) + DRAM measurements available (/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:0) + CPU 1: + CPU measurements available (/sys/class/powercap/intel-rapl/intel-rapl:1) + DRAM measurements available (/sys/class/powercap/intel-rapl/intel-rapl:1/intel-rapl:1:0) + +================================================================================ +``` + +## System privileges ### When are extra system privileges needed? -The Linux capability `SYS_ADMIN` is required in order to change the GPU's power limit or frequency. -Specifically, this is needed by the [`GlobalPowerLimitOptimizer`][zeus.optimizer.power_limit.GlobalPowerLimitOptimizer] and the [`PipelineFrequencyOptimizer`][zeus.optimizer.pipeline_frequency.PipelineFrequencyOptimizer]. +1. **CPU energy measurement**: `root` privileges are needed when measuring CPU energy through the Intel RAPL interface. This is due to a [security issue](https://www.cve.org/CVERecord?id=CVE-2020-8694). Specifically, this is needed if you want to measure CPU energy via [`ZeusMonitor`][zeus.monitor.energy.ZeusMonitor] with `cpu_indices`. +2. **GPU energy optimization**: The Linux security capability `SYS_ADMIN` (`root` is fine as well as it's stronger) is required in order to change the GPU's power limit or frequency. Specifically, this is needed by the [`GlobalPowerLimitOptimizer`][zeus.optimizer.power_limit.GlobalPowerLimitOptimizer] and the [`PipelineFrequencyOptimizer`][zeus.optimizer.pipeline_frequency.PipelineFrequencyOptimizer]. ### Option 1: Running applications in a Docker container -Using Docker, you can pass `--cap-add SYS_ADMIN` to `docker run`. +For CPU energy measurement, you are `root` inside a Docker container. You will just need to mount the RAPL sysfs directory into the Docker container. See [here](#using-docker) for instructions. + +For GPU energy optimization, you can pass `--cap-add SYS_ADMIN` to `docker run`. Since this significantly simplifies running Zeus, we recommend users to consider this option first. This is also possible for Kubernetes Pods with `securityContext.capabilities.add` in container specs ([docs](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-capabilities-for-a-container){.external}). @@ -112,17 +157,20 @@ cargo install zeusd # Run zeusd with admin privileges sudo zeusd \ - --socket-path /var/run/zeusd.sock \ # (1)! + --socket-path /var/run/zeusd.sock \ # (1)! --socket-permissions 666 # (2)! ``` 1. Unix domain socket path that `zeusd` listens to. 2. Applications need *write* access to the socket to be able to talk to `zeusd`. This string is interpreted as [UNIX file permissions](https://en.wikipedia.org/wiki/File-system_permissions#Numeric_notation). +We're currently working on adding Intel RAPL support to the Zeus daemon ([tracking issue](https://github.com/ml-energy/zeus/issues/110)). +We plan to land this feature at the end of 2024. + ### Option 3: Running applications with `sudo` This is probably the worst option. -However, if none of the options above work, you can run your application with `sudo`, which automatically has `SYS_ADMIN`. +However, if none of the options above work, you can run your application with `sudo`, which is essentially `root` and automatically has `SYS_ADMIN`. ## Next Steps diff --git a/docs/measure/index.md b/docs/measure/index.md index 8f9252d7..10f02cfe 100644 --- a/docs/measure/index.md +++ b/docs/measure/index.md @@ -76,6 +76,44 @@ Depending on the Deep Learning framework you're using (currently PyTorch and JAX This is usually what you want, except when using more advanced device partitioning (e.g., using `--xla_force_host_platform_device_count` in JAX to partition CPUs into more pieces). In such cases, you probably want to opt out from using this function and handle synchronization manually at the appropriate granularity. +## CPU measurements using Intel RAPL + +[`ZeusMonitor`][zeus.monitor.ZeusMonitor] supports CPU/DRAM energy measurement as well! + +The RAPL interface for CPU energy measurement is available for the majority of Intel and AMD CPUs. +DRAM energy measurement are available on some CPUs as well. +To check support, refer to [Verifying installation](../getting_started/index.md#verifying-installation). + +To only measure the energy consumption of the CPU used by the current Python process, you can use the [`get_current_cpu_index`][zeus.device.cpu.get_current_cpu_index] function, which retrieves the CPU index where the specified process ID is running. + +You can pass in `cpu_indices=[]` or `gpu_indices=[]` to [`ZeusMonitor`][zeus.monitor.ZeusMonitor] to disable either CPU or GPU measurements. + +```python hl_lines="2 5-7" +from zeus.monitor import ZeusMonitor +from zues.device.cpu import get_current_cpu_index + +if __name__ == "__main__": + # Get the CPU index of the current process + current_cpu_index = get_current_cpu_index() + monitor = ZeusMonitor(cpu_indices=[current_cpu_index], gpu_indices=[]) + + for epoch in range(100): + monitor.begin_window("epoch") + + steps = [] + for x, y in train_loader: + monitor.begin_window("step") + train_one_step(x, y) + result = monitor.end_window("step") + steps.append(result) + + mes = monitor.end_window("epoch") + print(f"Epoch {epoch} consumed {mes.time} s and {mes.total_energy} J.") + + avg_time = sum(map(lambda m: m.time, steps)) / len(steps) + avg_energy = sum(map(lambda m: m.total_energy, steps)) / len(steps) + print(f"One step takes {avg_time} s and {avg_energy} J for the CPU.") +``` ## CLI power and energy monitor diff --git a/zeus/monitor/energy.py b/zeus/monitor/energy.py index 846d26b3..d4037f73 100644 --- a/zeus/monitor/energy.py +++ b/zeus/monitor/energy.py @@ -271,7 +271,7 @@ def begin_window(self, key: str, sync_execution: bool = True) -> None: raise ValueError(f"Measurement window '{key}' already exists") # Synchronize execution (e.g., cudaSynchronize) to freeze at the right time. - if sync_execution: + if sync_execution and self.gpu_indices: sync_execution_fn(self.gpu_indices, sync_with=self.sync_with) # Freeze the start time of the profiling window. @@ -337,7 +337,7 @@ def end_window( ) # Synchronize execution (e.g., cudaSynchronize) to freeze at the right time. - if sync_execution: + if sync_execution and self.gpu_indices: sync_execution_fn(self.gpu_indices, sync_with=self.sync_with) # If the measurement window is cancelled, return an empty Measurement object.