diff --git a/.clang-format b/.clang-format index 2a3dc26d..e593f29e 100644 --- a/.clang-format +++ b/.clang-format @@ -36,9 +36,21 @@ BreakBeforeBinaryOperators: None BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakInheritanceList: BeforeComma -ColumnLimit: 80 +ColumnLimit: 100 CompactNamespaces: false ContinuationIndentWidth: 2 +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^$' + Priority: 5 IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 @@ -55,7 +67,7 @@ PenaltyExcessCharacter: 100 PenaltyReturnTypeOnItsOwnLine: 90 PointerAlignment: Right ReflowComments: true -SortIncludes: true +SortIncludes: CaseInsensitive SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true diff --git a/.clangd b/.clangd new file mode 100644 index 00000000..0e4c84bd --- /dev/null +++ b/.clangd @@ -0,0 +1,62 @@ +# https://clangd.llvm.org/config + +# Apply a config conditionally to all C files +If: + PathMatch: .*\.(c|h)$ + +--- + +# Apply a config conditionally to all C++ files +If: + PathMatch: .*\.(c|h)pp + +--- + +# Apply a config conditionally to all CUDA files +If: + PathMatch: .*\.cuh? +CompileFlags: + Add: + # Allow variadic CUDA functions + - "-Xclang=-fcuda-allow-variadic-functions" + +--- + +# Tweak the clangd parse settings for all files +CompileFlags: + Compiler: clang++ + CompilationDatabase: . + Add: + - -x + - cuda + # report all errors + - "-ferror-limit=0" + - "-ftemplate-backtrace-limit=0" + - "-std=c++17" + Remove: + # strip CUDA fatbin args + - "-Xfatbin*" + - "-Xcompiler*" + - "-Xcudafe*" + - "-rdc=*" + - "-gpu=*" + - "--diag_suppress*" + # strip CUDA arch flags + - "-gencode*" + - "--generate-code*" + # strip gcc's -fcoroutines + - -fcoroutines + # strip CUDA flags unknown to clang + - "-ccbin*" + - "--compiler-options*" + - "--expt-extended-lambda" + - "--expt-relaxed-constexpr" + - "-forward-unknown-to-host-compiler" + - "-Werror=cross-execution-space-call" +Diagnostics: + Suppress: + - "variadic_device_fn" + - "attributes_not_allowed" + # The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error. + # Temporarily suppressing it, but should probably fix + - "template_param_shadow" diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 00000000..e84b5f39 --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,198 @@ +> **Note** +> The instructions in this README are specific to Linux development environments. Instructions for Windows are coming soon! + +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json) + +# CCCL Dev Containers + +CCCL uses [Development Containers](https://containers.dev/) to provide consistent and convenient development environments for both local development and for CI. This guide covers setup in [Visual Studio Code](#quickstart-vscode-recommended) and [Docker](#quickstart-docker-manual-approach). The guide also provides additional instructions in case you want use WSL. + +## Table of Contents +1. [Quickstart: VSCode (Recommended)](#vscode) +2. [Quickstart: Docker (Manual Approach)](#docker) +3. [Quickstart: Using WSL](#wsl) + +## Quickstart: VSCode (Recommended) + +### Prerequisites +- [Visual Studio Code](https://code.visualstudio.com/) +- [Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +- [Docker](https://docs.docker.com/engine/install/) - This is only for completeness because it should already be implicitly installed by the Dev Containers extension + +### Steps + +1. Clone the Repository + ```bash + git clone https://github.com/nvidia/cccl.git + ``` +2. Open the cloned directory in VSCode + +3. Launch a Dev Container by clicking the prompt suggesting to "Reopen in Container" + + ![Shows "Reopen in Container" prompt when opening the cccl directory in VScode.](./img/reopen_in_container.png) + + - Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it. + + ![Shows "Reopen in Container" in command pallete.](./img/open_in_container_manual.png) + +4. Select an environment with the desired CTK and host compiler from the list: + + ![Shows list of available container environments.](./img/container_list.png) + +5. VSCode will initialize the selected Dev Container. This can take a few minutes the first time. + +6. Once initialized, the local `cccl/` directory is mirrored into the container to ensure any changes are persistent. + +7. Done! See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests. + +### (Optional) Authenticate with GitHub for `sccache` + +After starting the container, there will be a prompt to authenticate with GitHub. This grants access to a [`sccache`](https://github.com/mozilla/sccache) server shared with CI and greatly accelerates local build times. This is currently limited to NVIDIA employees belonging to the `NVIDIA` or `rapidsai` GitHub organizations. + +Without authentication to the remote server, `sccache` will still accelerate local builds by using a filesystem cache. + +Follow the instructions in the prompt as below and enter the one-time code at https://github.com/login/device + + ![Shows authentication with GitHub to access sccache bucket.](./img/github_auth.png) + +To manually trigger this authentication, execute the `devcontainer-utils-vault-s3-init` script within the container. + +For more information about the sccache configuration and authentication, see the documentation at [`rapidsai/devcontainers`](https://github.com/rapidsai/devcontainers/blob/branch-23.10/USAGE.md#build-caching-with-sccache). + +## Quickstart: Docker (Manual Approach) + +### Prerequisites +- [Docker](https://docs.docker.com/desktop/install/linux-install/) + +### Steps +1. Clone the repository and use the [`launch.sh`](./launch.sh) script to launch the default container environment + ```bash + git clone https://github.com/nvidia/cccl.git + cd cccl + ./.devcontainer/launch.sh --docker + ``` + This script starts an interactive shell as the `coder` user inside the container with the local `cccl/` directory mirrored into `/home/coder/cccl`. + + For specific environments, use the `--cuda` and `--host` options: + ```bassh + ./.devcontainer/launch.sh --docker --cuda 12.2 --host gcc10 + ``` + See `./.devcontainer/launch.sh --help` for more information. + +2. Done. See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests. + +## Available Environments + +CCCL provides environments for both the oldest and newest supported CUDA versions with all compatible host compilers. + +Look in the [`.devcontainer/`](.) directory to see the available configurations. The top-level [`devcontainer.json`](./devcontainer.json) serves as the default environment. All `devcontainer.json` files in the `cuda-` sub-directories are variations on this top-level file, with different base images for the different CUDA and host compiler versions. + +## VSCode Customization + +By default, CCCL's Dev Containers come with certain VSCode settings and extensions configured by default, as can be seen in the [`devcontainer.json`](./devcontainer.json) file. This can be further customized by users without needing to modify the `devcontainer.json` file directly. + +For extensions, the [`dev.containers.defaultExtensions` setting](https://code.visualstudio.com/docs/devcontainers/containers#_always-installed-extensions) allows listing extensions that will always be installed. + +For more general customizations, VSCode allows using a dotfile repository. See the [VSCode documentation](https://code.visualstudio.com/docs/devcontainers/containers#_personalizing-with-dotfile-repositories) for more information. + +## GitHub Codespaces + +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json) + +One of the benefits of Dev Containers is that they integrate natively with [GitHub Codespaces](https://github.com/features/codespaces). Codespaces provide a VSCode development environment right in your browser running on a machine in the cloud. This provides a truly one-click, turnkey development environment where you can develop, build, and test with no other setup required. + +Click the badge above or [click here](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json) to get started with CCCL's Dev Containers on Codespaces. This will start the default Dev Container environment. [Click here](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=296416761&skip_quickstart=true) to start a Codespace with a particular environment and hardware configuration as shown: + + ![Shows configuring a Codespace with a custom environment](../docs/images/codespaces.png) + +## For Maintainers: The `make_devcontainers.sh` Script + +### Overview + +[`make_devcontainers.sh`](./make_devcontainers.sh) generates devcontainer configurations for the unique combinations of CUDA Toolkit (CTK) versions and host compilers in [`ci/matrix.yaml`](../ci/matrix.yaml). + +### How It Works: + +1. Parses the matrix from `ci/matrix.yaml`. +2. Use the top-level [`.devcontainer/devcontainer.json`](./devcontainer.json) as a template. For each unique combination of CTK version and host compiler, generate a corresponding `devcontainer.json` configuration, adjusting only the base Docker image to match the desired environment. +3. Place the generated configurations in the `.devcontainer` directory, organizing them into subdirectories following the naming convention `cuda-`. + +For more information, see the `.devcontainer/make_devcontainers.sh --help` message. + +**Note**: When adding or updating supported environments, modify `matrix.yaml` and then rerun this script to synchronize the `devcontainer` configurations. + +## Quickstart: Using WSL + +> [!NOTE] +> _Make sure you have the Nvidia driver installed on your Windows host before moving further_. Type in `nvidia-smi` for verification. + +### Install WSL on your Windows host + +> [!WARNING] +> Disclaimer: This guide was developed for WSL 2 on Windows 11. + +1. Launch a Windows terminal (_e.g. Powershell_) as an administrator. + +2. Install WSL 2 by running: +```bash +wsl --install +``` +This should probably install Ubuntu distro as a default. + +3. Restart your computer and run `wsl -l -v` on a Windows terminal to verify installation. + +

Install prerequisites and VS Code extensions

+ +4. Launch your WSL/Ubuntu terminal by running `wsl` in Powershell. + +5. Install the [WSL extension](ms-vscode-remote.remote-wsl) on VS Code. + + - `Ctrl + Shift + P` and select `WSL: Connect to WSL` (it will prompt you to install the WSL extension). + + - Make sure you are connected to WSL with VS Code by checking the bottom left corner of the VS Code window (should indicate "WSL: Ubuntu" in our case). + +6. Install the [Dev Containers extension](ms-vscode-remote.remote-containers) on VS Code. + + - In a vanilla system you should be prompted to install `Docker` at this point, accept it. If it hangs you might have to restart VS Code after that. + +7. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). **Make sure you install the WSL 2 version and not the native Linux one**. This builds on top of Docker so make sure you have Docker properly installed (run `docker --version`). + +8. Open `/etc/docker/daemon.json` from within your WSL system (if the file does not exist, create it) and add the following: + +```json +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } +} +``` + +then run `sudo systemctl restart docker.service`. + +--- +### Build CCCL in WSL using Dev Containers + +9. Still on your WSL terminal run `git clone https://github.com/NVIDIA/cccl.git` + + +10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located). + +11. If prompted, choose `Reopen in Container`. + + - If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`. + +12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order. + +From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration: + +13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message. + +> Failed opening a web browser at https://github.com/login/device + exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH + Please try entering the URL in your browser manually + +In that case type in the address manually in your web browser https://github.com/login/device and fill in the one-time code. diff --git a/.devcontainer/cuda11.1-gcc7/devcontainer.json b/.devcontainer/cuda11.1-gcc7/devcontainer.json new file mode 100644 index 00000000..9cffedae --- /dev/null +++ b/.devcontainer/cuda11.1-gcc7/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc7-cuda11.1-ubuntu18.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda11.1-gcc7", + "CCCL_CUDA_VERSION": "11.1", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "7", + "CCCL_BUILD_INFIX": "cuda11.1-gcc7" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda11.1-gcc7" +} diff --git a/.devcontainer/cuda11.1-gcc8/devcontainer.json b/.devcontainer/cuda11.1-gcc8/devcontainer.json new file mode 100644 index 00000000..de336499 --- /dev/null +++ b/.devcontainer/cuda11.1-gcc8/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc8-cuda11.1-ubuntu18.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda11.1-gcc8", + "CCCL_CUDA_VERSION": "11.1", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "8", + "CCCL_BUILD_INFIX": "cuda11.1-gcc8" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda11.1-gcc8" +} diff --git a/.devcontainer/cuda11.1-gcc9/devcontainer.json b/.devcontainer/cuda11.1-gcc9/devcontainer.json new file mode 100644 index 00000000..559bb50a --- /dev/null +++ b/.devcontainer/cuda11.1-gcc9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc9-cuda11.1-ubuntu18.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda11.1-gcc9", + "CCCL_CUDA_VERSION": "11.1", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda11.1-gcc9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda11.1-gcc9" +} diff --git a/.devcontainer/cuda11.1-llvm9/devcontainer.json b/.devcontainer/cuda11.1-llvm9/devcontainer.json new file mode 100644 index 00000000..602753c6 --- /dev/null +++ b/.devcontainer/cuda11.1-llvm9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm9-cuda11.1-ubuntu18.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda11.1-llvm9", + "CCCL_CUDA_VERSION": "11.1", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda11.1-llvm9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda11.1-llvm9" +} diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json new file mode 100644 index 00000000..5e480245 --- /dev/null +++ b/.devcontainer/cuda11.8-gcc11/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda11.8-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda11.8-gcc11", + "CCCL_CUDA_VERSION": "11.8", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda11.8-gcc11" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda11.8-gcc11" +} diff --git a/.devcontainer/cuda12.0-gcc10/devcontainer.json b/.devcontainer/cuda12.0-gcc10/devcontainer.json new file mode 100644 index 00000000..68d5f8ca --- /dev/null +++ b/.devcontainer/cuda12.0-gcc10/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc10-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc10", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "10", + "CCCL_BUILD_INFIX": "cuda12.0-gcc10" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc10" +} diff --git a/.devcontainer/cuda12.0-gcc11/devcontainer.json b/.devcontainer/cuda12.0-gcc11/devcontainer.json new file mode 100644 index 00000000..f811a4a6 --- /dev/null +++ b/.devcontainer/cuda12.0-gcc11/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda12.0-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc11", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda12.0-gcc11" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc11" +} diff --git a/.devcontainer/cuda12.0-gcc12/devcontainer.json b/.devcontainer/cuda12.0-gcc12/devcontainer.json new file mode 100644 index 00000000..6f702f41 --- /dev/null +++ b/.devcontainer/cuda12.0-gcc12/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.0-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc12", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda12.0-gcc12" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc12" +} diff --git a/.devcontainer/cuda12.0-gcc7/devcontainer.json b/.devcontainer/cuda12.0-gcc7/devcontainer.json new file mode 100644 index 00000000..ca9ab6ce --- /dev/null +++ b/.devcontainer/cuda12.0-gcc7/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc7-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc7", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "7", + "CCCL_BUILD_INFIX": "cuda12.0-gcc7" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc7" +} diff --git a/.devcontainer/cuda12.0-gcc8/devcontainer.json b/.devcontainer/cuda12.0-gcc8/devcontainer.json new file mode 100644 index 00000000..387b53db --- /dev/null +++ b/.devcontainer/cuda12.0-gcc8/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc8-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc8", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "8", + "CCCL_BUILD_INFIX": "cuda12.0-gcc8" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc8" +} diff --git a/.devcontainer/cuda12.0-gcc9/devcontainer.json b/.devcontainer/cuda12.0-gcc9/devcontainer.json new file mode 100644 index 00000000..d2e01ba1 --- /dev/null +++ b/.devcontainer/cuda12.0-gcc9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc9-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-gcc9", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda12.0-gcc9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-gcc9" +} diff --git a/.devcontainer/cuda12.0-llvm10/devcontainer.json b/.devcontainer/cuda12.0-llvm10/devcontainer.json new file mode 100644 index 00000000..c227e9a5 --- /dev/null +++ b/.devcontainer/cuda12.0-llvm10/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm10-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm10", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "10", + "CCCL_BUILD_INFIX": "cuda12.0-llvm10" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm10" +} diff --git a/.devcontainer/cuda12.0-llvm11/devcontainer.json b/.devcontainer/cuda12.0-llvm11/devcontainer.json new file mode 100644 index 00000000..a61ae4b5 --- /dev/null +++ b/.devcontainer/cuda12.0-llvm11/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm11-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm11", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda12.0-llvm11" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm11" +} diff --git a/.devcontainer/cuda12.0-llvm12/devcontainer.json b/.devcontainer/cuda12.0-llvm12/devcontainer.json new file mode 100644 index 00000000..c63e4050 --- /dev/null +++ b/.devcontainer/cuda12.0-llvm12/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm12-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm12", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda12.0-llvm12" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm12" +} diff --git a/.devcontainer/cuda12.0-llvm13/devcontainer.json b/.devcontainer/cuda12.0-llvm13/devcontainer.json new file mode 100644 index 00000000..5cd6163c --- /dev/null +++ b/.devcontainer/cuda12.0-llvm13/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm13-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm13", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "13", + "CCCL_BUILD_INFIX": "cuda12.0-llvm13" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm13" +} diff --git a/.devcontainer/cuda12.0-llvm14/devcontainer.json b/.devcontainer/cuda12.0-llvm14/devcontainer.json new file mode 100644 index 00000000..0fcae844 --- /dev/null +++ b/.devcontainer/cuda12.0-llvm14/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm14-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm14", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda12.0-llvm14" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm14" +} diff --git a/.devcontainer/cuda12.0-llvm9/devcontainer.json b/.devcontainer/cuda12.0-llvm9/devcontainer.json new file mode 100644 index 00000000..6b9530e5 --- /dev/null +++ b/.devcontainer/cuda12.0-llvm9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm9-cuda12.0-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.0-llvm9", + "CCCL_CUDA_VERSION": "12.0", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda12.0-llvm9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.0-llvm9" +} diff --git a/.devcontainer/cuda12.6-gcc10/devcontainer.json b/.devcontainer/cuda12.6-gcc10/devcontainer.json new file mode 100644 index 00000000..2da4470b --- /dev/null +++ b/.devcontainer/cuda12.6-gcc10/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc10-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc10", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "10", + "CCCL_BUILD_INFIX": "cuda12.6-gcc10" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc10" +} diff --git a/.devcontainer/cuda12.6-gcc11/devcontainer.json b/.devcontainer/cuda12.6-gcc11/devcontainer.json new file mode 100644 index 00000000..2930279f --- /dev/null +++ b/.devcontainer/cuda12.6-gcc11/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc11", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda12.6-gcc11" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc11" +} diff --git a/.devcontainer/cuda12.6-gcc12/devcontainer.json b/.devcontainer/cuda12.6-gcc12/devcontainer.json new file mode 100644 index 00000000..c4774db4 --- /dev/null +++ b/.devcontainer/cuda12.6-gcc12/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc12", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda12.6-gcc12" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc12" +} diff --git a/.devcontainer/cuda12.6-gcc7/devcontainer.json b/.devcontainer/cuda12.6-gcc7/devcontainer.json new file mode 100644 index 00000000..1e731419 --- /dev/null +++ b/.devcontainer/cuda12.6-gcc7/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc7-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc7", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "7", + "CCCL_BUILD_INFIX": "cuda12.6-gcc7" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc7" +} diff --git a/.devcontainer/cuda12.6-gcc8/devcontainer.json b/.devcontainer/cuda12.6-gcc8/devcontainer.json new file mode 100644 index 00000000..92922c23 --- /dev/null +++ b/.devcontainer/cuda12.6-gcc8/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc8-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc8", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "8", + "CCCL_BUILD_INFIX": "cuda12.6-gcc8" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc8" +} diff --git a/.devcontainer/cuda12.6-gcc9/devcontainer.json b/.devcontainer/cuda12.6-gcc9/devcontainer.json new file mode 100644 index 00000000..f3f52237 --- /dev/null +++ b/.devcontainer/cuda12.6-gcc9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc9-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc9", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda12.6-gcc9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc9" +} diff --git a/.devcontainer/cuda12.6-llvm10/devcontainer.json b/.devcontainer/cuda12.6-llvm10/devcontainer.json new file mode 100644 index 00000000..01e2d4ab --- /dev/null +++ b/.devcontainer/cuda12.6-llvm10/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm10-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm10", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "10", + "CCCL_BUILD_INFIX": "cuda12.6-llvm10" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm10" +} diff --git a/.devcontainer/cuda12.6-llvm11/devcontainer.json b/.devcontainer/cuda12.6-llvm11/devcontainer.json new file mode 100644 index 00000000..3ea9167a --- /dev/null +++ b/.devcontainer/cuda12.6-llvm11/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm11-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm11", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "11", + "CCCL_BUILD_INFIX": "cuda12.6-llvm11" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm11" +} diff --git a/.devcontainer/cuda12.6-llvm12/devcontainer.json b/.devcontainer/cuda12.6-llvm12/devcontainer.json new file mode 100644 index 00000000..6a8fd246 --- /dev/null +++ b/.devcontainer/cuda12.6-llvm12/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm12-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm12", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda12.6-llvm12" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm12" +} diff --git a/.devcontainer/cuda12.6-llvm13/devcontainer.json b/.devcontainer/cuda12.6-llvm13/devcontainer.json new file mode 100644 index 00000000..722b8a1b --- /dev/null +++ b/.devcontainer/cuda12.6-llvm13/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm13-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm13", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "13", + "CCCL_BUILD_INFIX": "cuda12.6-llvm13" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm13" +} diff --git a/.devcontainer/cuda12.6-llvm14/devcontainer.json b/.devcontainer/cuda12.6-llvm14/devcontainer.json new file mode 100644 index 00000000..ca7ec344 --- /dev/null +++ b/.devcontainer/cuda12.6-llvm14/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm14-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm14", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "14", + "CCCL_BUILD_INFIX": "cuda12.6-llvm14" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm14" +} diff --git a/.devcontainer/cuda12.6-llvm15/devcontainer.json b/.devcontainer/cuda12.6-llvm15/devcontainer.json new file mode 100644 index 00000000..889c71a5 --- /dev/null +++ b/.devcontainer/cuda12.6-llvm15/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm15-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm15", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "15", + "CCCL_BUILD_INFIX": "cuda12.6-llvm15" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm15" +} diff --git a/.devcontainer/cuda12.6-llvm16/devcontainer.json b/.devcontainer/cuda12.6-llvm16/devcontainer.json new file mode 100644 index 00000000..e93737d3 --- /dev/null +++ b/.devcontainer/cuda12.6-llvm16/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm16-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm16", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "16", + "CCCL_BUILD_INFIX": "cuda12.6-llvm16" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm16" +} diff --git a/.devcontainer/cuda12.6-llvm17/devcontainer.json b/.devcontainer/cuda12.6-llvm17/devcontainer.json new file mode 100644 index 00000000..1f5e05dc --- /dev/null +++ b/.devcontainer/cuda12.6-llvm17/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm17-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm17", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "17", + "CCCL_BUILD_INFIX": "cuda12.6-llvm17" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm17" +} diff --git a/.devcontainer/cuda12.6-llvm18/devcontainer.json b/.devcontainer/cuda12.6-llvm18/devcontainer.json new file mode 100644 index 00000000..6cbe548a --- /dev/null +++ b/.devcontainer/cuda12.6-llvm18/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm18-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm18", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "18", + "CCCL_BUILD_INFIX": "cuda12.6-llvm18" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm18" +} diff --git a/.devcontainer/cuda12.6-llvm9/devcontainer.json b/.devcontainer/cuda12.6-llvm9/devcontainer.json new file mode 100644 index 00000000..9f97f1cd --- /dev/null +++ b/.devcontainer/cuda12.6-llvm9/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-llvm9-cuda12.6-ubuntu20.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-llvm9", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "9", + "CCCL_BUILD_INFIX": "cuda12.6-llvm9" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-llvm9" +} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..c4774db4 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.6-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.6-gcc12", + "CCCL_CUDA_VERSION": "12.6", + "CCCL_HOST_COMPILER": "gcc", + "CCCL_HOST_COMPILER_VERSION": "12", + "CCCL_BUILD_INFIX": "cuda12.6-gcc12" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.6-gcc12" +} diff --git a/.devcontainer/docker-entrypoint.sh b/.devcontainer/docker-entrypoint.sh new file mode 100755 index 00000000..0fd94876 --- /dev/null +++ b/.devcontainer/docker-entrypoint.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Maybe change the UID/GID of the container's non-root user to match the host's UID/GID + +: "${REMOTE_USER:="coder"}"; +: "${OLD_UID:=}"; +: "${OLD_GID:=}"; +: "${NEW_UID:=}"; +: "${NEW_GID:=}"; + +eval "$(sed -n "s/${REMOTE_USER}:[^:]*:\([^:]*\):\([^:]*\):[^:]*:\([^:]*\).*/OLD_UID=\1;OLD_GID=\2;HOME_FOLDER=\3/p" /etc/passwd)"; +eval "$(sed -n "s/\([^:]*\):[^:]*:${NEW_UID}:.*/EXISTING_USER=\1/p" /etc/passwd)"; +eval "$(sed -n "s/\([^:]*\):[^:]*:${NEW_GID}:.*/EXISTING_GROUP=\1/p" /etc/group)"; + +if [ -z "$OLD_UID" ]; then + echo "Remote user not found in /etc/passwd ($REMOTE_USER)."; + exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@"; +elif [ "$OLD_UID" = "$NEW_UID" ] && [ "$OLD_GID" = "$NEW_GID" ]; then + echo "UIDs and GIDs are the same ($NEW_UID:$NEW_GID)."; + exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@"; +elif [ "$OLD_UID" != "$NEW_UID" ] && [ -n "$EXISTING_USER" ]; then + echo "User with UID exists ($EXISTING_USER=$NEW_UID)."; + exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@"; +else + if [ "$OLD_GID" != "$NEW_GID" ] && [ -n "$EXISTING_GROUP" ]; then + echo "Group with GID exists ($EXISTING_GROUP=$NEW_GID)."; + NEW_GID="$OLD_GID"; + fi + echo "Updating UID:GID from $OLD_UID:$OLD_GID to $NEW_UID:$NEW_GID."; + sed -i -e "s/\(${REMOTE_USER}:[^:]*:\)[^:]*:[^:]*/\1${NEW_UID}:${NEW_GID}/" /etc/passwd; + if [ "$OLD_GID" != "$NEW_GID" ]; then + sed -i -e "s/\([^:]*:[^:]*:\)${OLD_GID}:/\1${NEW_GID}:/" /etc/group; + fi + + # Fast parallel `chown -R` + find "$HOME_FOLDER/" -not -user "$REMOTE_USER" -print0 \ + | xargs -0 -r -n1 -P"$(nproc --all)" chown "$NEW_UID:$NEW_GID" + + # Run the container command as $REMOTE_USER, preserving the container startup environment. + # + # We cannot use `su -w` because that's not supported by the `su` in Ubuntu18.04, so we reset the following + # environment variables to the expected values, then pass through everything else from the startup environment. + export HOME="$HOME_FOLDER"; + export XDG_CACHE_HOME="$HOME_FOLDER/.cache"; + export XDG_CONFIG_HOME="$HOME_FOLDER/.config"; + export XDG_STATE_HOME="$HOME_FOLDER/.local/state"; + export PYTHONHISTFILE="$HOME_FOLDER/.local/state/.python_history"; + exec su -p "$REMOTE_USER" -- "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@"; +fi diff --git a/.devcontainer/img/container_list.png b/.devcontainer/img/container_list.png new file mode 100644 index 00000000..09c4510f Binary files /dev/null and b/.devcontainer/img/container_list.png differ diff --git a/.devcontainer/img/github_auth.png b/.devcontainer/img/github_auth.png new file mode 100644 index 00000000..3f52b3a2 Binary files /dev/null and b/.devcontainer/img/github_auth.png differ diff --git a/.devcontainer/img/open_in_container_manual.png b/.devcontainer/img/open_in_container_manual.png new file mode 100644 index 00000000..e09435b8 Binary files /dev/null and b/.devcontainer/img/open_in_container_manual.png differ diff --git a/.devcontainer/img/reopen_in_container.png b/.devcontainer/img/reopen_in_container.png new file mode 100644 index 00000000..0e1d82dd Binary files /dev/null and b/.devcontainer/img/reopen_in_container.png differ diff --git a/.devcontainer/launch.sh b/.devcontainer/launch.sh new file mode 100755 index 00000000..a9ef143c --- /dev/null +++ b/.devcontainer/launch.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Ensure the script is being executed in the nvbench/ root +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.."; + +print_help() { + echo "Usage: $0 [-c|--cuda ] [-H|--host ] [-d|--docker]" + echo "Launch a development container. If no CUDA version or Host compiler are specified," + echo "the top-level devcontainer in .devcontainer/devcontainer.json will be used." + echo "" + echo "Options:" + echo " -c, --cuda Specify the CUDA version. E.g., 12.2" + echo " -H, --host Specify the host compiler. E.g., gcc12" + echo " -d, --docker Launch the development environment in Docker directly without using VSCode." + echo " --gpus gpu-request GPU devices to add to the container ('all' to pass all GPUs)." + echo " -e, --env list Set additional container environment variables." + echo " -v, --volume list Bind mount a volume." + echo " -h, --help Display this help message and exit." +} + +# Assign variable one scope above the caller +# Usage: local "$1" && _upvar $1 "value(s)" +# Param: $1 Variable name to assign value to +# Param: $* Value(s) to assign. If multiple values, an array is +# assigned, otherwise a single value is assigned. +# See: http://fvue.nl/wiki/Bash:_Passing_variables_by_reference +_upvar() { + if unset -v "$1"; then + if (( $# == 2 )); then + eval $1=\"\$2\"; + else + eval $1=\(\"\${@:2}\"\); + fi; + fi +} + +parse_options() { + local -; + set -euo pipefail; + + # Read the name of the variable in which to return unparsed arguments + local UNPARSED="${!#}"; + # Splice the unparsed arguments variable name from the arguments list + set -- "${@:1:$#-1}"; + + local OPTIONS=c:e:H:dhv + local LONG_OPTIONS=cuda:,env:,host:,gpus:,volume:,docker,help + # shellcheck disable=SC2155 + local PARSED_OPTIONS=$(getopt -n "$0" -o "${OPTIONS}" --long "${LONG_OPTIONS}" -- "$@") + + # shellcheck disable=SC2181 + if [[ $? -ne 0 ]]; then + exit 1 + fi + + eval set -- "${PARSED_OPTIONS}" + + while true; do + case "$1" in + -c|--cuda) + cuda_version="$2" + shift 2 + ;; + -e|--env) + env_vars+=("$1" "$2") + shift 2 + ;; + -H|--host) + host_compiler="$2" + shift 2 + ;; + --gpus) + gpu_request="$2" + shift 2 + ;; + -d|--docker) + docker_mode=true + shift + ;; + -h|--help) + print_help + exit 0 + ;; + -v|--volume) + volumes+=("$1" "$2") + shift 2 + ;; + --) + shift + _upvar "${UNPARSED}" "${@}" + break + ;; + *) + echo "Invalid option: $1" + print_help + exit 1 + ;; + esac + done +} + +# shellcheck disable=SC2155 +launch_docker() { + local -; + set -euo pipefail + + inline_vars() { + cat - \ + `# inline local workspace folder` \ + | sed "s@\${localWorkspaceFolder}@$(pwd)@g" \ + `# inline local workspace folder basename` \ + | sed "s@\${localWorkspaceFolderBasename}@$(basename "$(pwd)")@g" \ + `# inline container workspace folder` \ + | sed "s@\${containerWorkspaceFolder}@${WORKSPACE_FOLDER:-}@g" \ + `# inline container workspace folder basename` \ + | sed "s@\${containerWorkspaceFolderBasename}@$(basename "${WORKSPACE_FOLDER:-}")@g" \ + `# translate local envvars to shell syntax` \ + | sed -r 's/\$\{localEnv:([^\:]*):?(.*)\}/${\1:-\2}/g' + } + + args_to_path() { + local -a keys=("${@}") + keys=("${keys[@]/#/[}") + keys=("${keys[@]/%/]}") + echo "$(IFS=; echo "${keys[*]}")" + } + + json_string() { + python3 -c "import json,sys; print(json.load(sys.stdin)$(args_to_path "${@}"))" 2>/dev/null | inline_vars + } + + json_array() { + python3 -c "import json,sys; [print(f'\"{x}\"') for x in json.load(sys.stdin)$(args_to_path "${@}")]" 2>/dev/null | inline_vars + } + + json_map() { + python3 -c "import json,sys; [print(f'{k}=\"{v}\"') for k,v in json.load(sys.stdin)$(args_to_path "${@}").items()]" 2>/dev/null | inline_vars + } + + devcontainer_metadata_json() { + docker inspect --type image --format '{{json .Config.Labels}}' "$DOCKER_IMAGE" \ + | json_string '"devcontainer.metadata"' + } + + ### + # Read relevant values from devcontainer.json + ### + + local devcontainer_json="${path}/devcontainer.json"; + + # Read image + local DOCKER_IMAGE="$(json_string '"image"' < "${devcontainer_json}")" + # Always pull the latest copy of the image + docker pull "$DOCKER_IMAGE" + + # Read workspaceFolder + local WORKSPACE_FOLDER="$(json_string '"workspaceFolder"' < "${devcontainer_json}")" + # Read remoteUser + local REMOTE_USER="$(json_string '"remoteUser"' < "${devcontainer_json}")" + # If remoteUser isn't in our devcontainer.json, read it from the image's "devcontainer.metadata" label + if test -z "${REMOTE_USER:-}"; then + REMOTE_USER="$(devcontainer_metadata_json | json_string "-1" '"remoteUser"')" + fi + # Read runArgs + local -a RUN_ARGS="($(json_array '"runArgs"' < "${devcontainer_json}"))" + # Read initializeCommand + local -a INITIALIZE_COMMAND="($(json_array '"initializeCommand"' < "${devcontainer_json}"))" + # Read containerEnv + local -a ENV_VARS="($(json_map '"containerEnv"' < "${devcontainer_json}" | sed -r 's/(.*)=(.*)/--env \1=\2/'))" + # Read mounts + local -a MOUNTS="($( + tee < "${devcontainer_json}" \ + 1>/dev/null \ + >(json_array '"mounts"') \ + >(json_string '"workspaceMount"') \ + | xargs -r -I% echo --mount '%' + ))" + + ### + # Update run arguments and container environment variables + ### + + # Only pass `-it` if the shell is a tty + if ! ${CI:-'false'} && tty >/dev/null 2>&1 && (exec /dev/null 2>&1; then + RUN_ARGS+=(--gpus all) + fi + fi + + RUN_ARGS+=(--workdir "${WORKSPACE_FOLDER:-/home/coder/nvbench}") + + if test -n "${REMOTE_USER:-}"; then + ENV_VARS+=(--env NEW_UID="$(id -u)") + ENV_VARS+=(--env NEW_GID="$(id -g)") + ENV_VARS+=(--env REMOTE_USER="$REMOTE_USER") + RUN_ARGS+=(-u root:root) + RUN_ARGS+=(--entrypoint "${WORKSPACE_FOLDER:-/home/coder/nvbench}/.devcontainer/docker-entrypoint.sh") + fi + + if test -n "${SSH_AUTH_SOCK:-}"; then + ENV_VARS+=(--env "SSH_AUTH_SOCK=/tmp/ssh-auth-sock") + MOUNTS+=(--mount "source=${SSH_AUTH_SOCK},target=/tmp/ssh-auth-sock,type=bind") + fi + + # Append user-provided volumes + if test -v volumes && test ${#volumes[@]} -gt 0; then + MOUNTS+=("${volumes[@]}") + fi + + # Append user-provided envvars + if test -v env_vars && test ${#env_vars[@]} -gt 0; then + ENV_VARS+=("${env_vars[@]}") + fi + + # Run the initialize command before starting the container + if test "${#INITIALIZE_COMMAND[@]}" -gt 0; then + eval "${INITIALIZE_COMMAND[*]@Q}" + fi + + exec docker run \ + "${RUN_ARGS[@]}" \ + "${ENV_VARS[@]}" \ + "${MOUNTS[@]}" \ + "${DOCKER_IMAGE}" \ + "$@" +} + +launch_vscode() { + local -; + set -euo pipefail; + # Since Visual Studio Code allows only one instance per `devcontainer.json`, + # this code prepares a unique temporary directory structure for each launch of a devcontainer. + # By doing so, it ensures that multiple instances of the same environment can be run + # simultaneously. The script replicates the `devcontainer.json` from the desired CUDA + # and compiler environment into this temporary directory, adjusting paths to ensure the + # correct workspace is loaded. A special URL is then generated to instruct VSCode to + # launch the development container using this temporary configuration. + local workspace="$(basename "$(pwd)")" + local tmpdir="$(mktemp -d)/${workspace}" + mkdir -p "${tmpdir}" + mkdir -p "${tmpdir}/.devcontainer" + cp -arL "${path}/devcontainer.json" "${tmpdir}/.devcontainer" + sed -i "s@\\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json" + local path="${tmpdir}" + local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')" + local url="vscode://vscode-remote/dev-container+${hash}/home/coder/nvbench" + + local launch="" + if type open >/dev/null 2>&1; then + launch="open" + elif type xdg-open >/dev/null 2>&1; then + launch="xdg-open" + fi + + if [ -n "${launch}" ]; then + echo "Launching VSCode Dev Container URL: ${url}" + code --new-window "${tmpdir}" + exec "${launch}" "${url}" >/dev/null 2>&1 + fi +} + +main() { + local -a unparsed; + parse_options "$@" unparsed; + set -- "${unparsed[@]}"; + + # If no CTK/Host compiler are provided, just use the default environment + if [[ -z ${cuda_version:-} ]] && [[ -z ${host_compiler:-} ]]; then + path=".devcontainer" + else + path=".devcontainer/cuda${cuda_version}-${host_compiler}" + if [[ ! -f "${path}/devcontainer.json" ]]; then + echo "Unknown CUDA [${cuda_version}] compiler [${host_compiler}] combination" + echo "Requested devcontainer ${path}/devcontainer.json does not exist" + exit 1 + fi + fi + + if ${docker_mode:-'false'}; then + launch_docker "$@" + else + launch_vscode + fi +} + +main "$@" + diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh new file mode 100755 index 00000000..f868cc14 --- /dev/null +++ b/.devcontainer/make_devcontainers.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# This script parses the CI matrix.yaml file and generates a devcontainer.json file for each unique combination of +# CUDA version, compiler name/version, and Ubuntu version. The devcontainer.json files are written to the +# .devcontainer directory to a subdirectory named after the CUDA version and compiler name/version. +# GitHub docs on using multiple devcontainer.json files: +# https://docs.github.com/en/codespaces/setting-up-your-project-for-codespaces/adding-a-dev-container-configuration/introduction-to-dev-containers#devcontainerjson + +set -euo pipefail + +# Ensure the script is being executed in its containing directory +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"; + + +function usage { + echo "Usage: $0 [--clean] [-h/--help] [-v/--verbose]" + echo " --clean Remove stale devcontainer subdirectories" + echo " -h, --help Display this help message" + echo " -v, --verbose Enable verbose mode (set -x)" + exit 1 +} + +# Function to update the devcontainer.json file with the provided parameters +update_devcontainer() { + local input_file="$1" + local output_file="$2" + local name="$3" + local cuda_version="$4" + local compiler_name="$5" + local compiler_exe="$6" + local compiler_version="$7" + local os="$8" + local devcontainer_version="$9" + + local IMAGE_ROOT="rapidsai/devcontainers:${devcontainer_version}-cpp-" + local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}-${os}" + + jq --arg image "$image" --arg name "$name" \ + --arg cuda_version "$cuda_version" --arg compiler_name "$compiler_name" \ + --arg compiler_exe "$compiler_exe" --arg compiler_version "$compiler_version" --arg os "$os" \ + '.image = $image | .name = $name | .containerEnv.DEVCONTAINER_NAME = $name | + .containerEnv.CCCL_BUILD_INFIX = $name | + .containerEnv.CCCL_CUDA_VERSION = $cuda_version | .containerEnv.CCCL_HOST_COMPILER = $compiler_name | + .containerEnv.CCCL_HOST_COMPILER_VERSION = $compiler_version '\ + "$input_file" > "$output_file" +} + +make_name() { + local cuda_version="$1" + local compiler_name="$2" + local compiler_version="$3" + + echo "cuda$cuda_version-$compiler_name$compiler_version" +} + +CLEAN=false +VERBOSE=false +while [[ $# -gt 0 ]]; do + case "$1" in + --clean) + CLEAN=true + ;; + -h|--help) + usage + ;; + -v|--verbose) + VERBOSE=true + ;; + *) + usage + ;; + esac + shift +done + +MATRIX_FILE="../ci/matrix.yaml" + +# Enable verbose mode if requested +if [ "$VERBOSE" = true ]; then + set -x + cat ${MATRIX_FILE} +fi + +# Read matrix.yaml and convert it to json +matrix_json=$(yq -o json ${MATRIX_FILE}) + +# Exclude Windows environments +readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))') + +# Get the devcontainer image version and define image tag root +readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version') + +# Get unique combinations of cuda version, compiler name/version, and Ubuntu version +readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]') + +# Update the base devcontainer with the default values +# The root devcontainer.json file is used as the default container as well as a template for all +# other devcontainer.json files by replacing the `image:` field with the appropriate image name +readonly base_devcontainer_file="./devcontainer.json" +readonly NEWEST_GCC_CUDA_ENTRY=$(echo "$combinations" | jq -rs '[.[] | select(.compiler_name == "gcc")] | sort_by((.cuda | tonumber), (.compiler_version | tonumber)) | .[-1]') +readonly DEFAULT_CUDA=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.cuda') +readonly DEFAULT_COMPILER_NAME=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_name') +readonly DEFAULT_COMPILER_EXE=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_exe') +readonly DEFAULT_COMPILER_VERSION=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_version') +readonly DEFAULT_OS=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.os') +readonly DEFAULT_NAME=$(make_name "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_VERSION") + +update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEFAULT_OS" "$DEVCONTAINER_VERSION" +mv "./temp_devcontainer.json" ${base_devcontainer_file} + +# Create an array to keep track of valid subdirectory names +valid_subdirs=() + +# The img folder should not be removed: +valid_subdirs+=("img") + +# For each unique combination +for combination in $combinations; do + cuda_version=$(echo "$combination" | jq -r '.cuda') + compiler_name=$(echo "$combination" | jq -r '.compiler_name') + compiler_exe=$(echo "$combination" | jq -r '.compiler_exe') + compiler_version=$(echo "$combination" | jq -r '.compiler_version') + os=$(echo "$combination" | jq -r '.os') + + name=$(make_name "$cuda_version" "$compiler_name" "$compiler_version") + mkdir -p "$name" + new_devcontainer_file="$name/devcontainer.json" + + update_devcontainer "$base_devcontainer_file" "$new_devcontainer_file" "$name" "$cuda_version" "$compiler_name" "$compiler_exe" "$compiler_version" "$os" "$DEVCONTAINER_VERSION" + echo "Created $new_devcontainer_file" + + # Add the subdirectory name to the valid_subdirs array + valid_subdirs+=("$name") +done + +# Clean up stale subdirectories and devcontainer.json files +if [ "$CLEAN" = true ]; then + for subdir in ./*; do + if [ -d "$subdir" ] && [[ ! " ${valid_subdirs[@]} " =~ " ${subdir#./} " ]]; then + echo "Removing stale subdirectory: $subdir" + rm -r "$subdir" + fi + done +fi diff --git a/.devcontainer/nvbench-entrypoint.sh b/.devcontainer/nvbench-entrypoint.sh new file mode 100755 index 00000000..8cf81c16 --- /dev/null +++ b/.devcontainer/nvbench-entrypoint.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC1091 + +set -e; + +devcontainer-utils-post-create-command; +devcontainer-utils-init-git; +devcontainer-utils-post-attach-command; + +cd /home/coder/nvbench/ + +if test $# -gt 0; then + exec "$@"; +else + exec /bin/bash -li; +fi diff --git a/.devcontainer/verify_devcontainer.sh b/.devcontainer/verify_devcontainer.sh new file mode 100755 index 00000000..b5934ea2 --- /dev/null +++ b/.devcontainer/verify_devcontainer.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +function usage { + echo "Usage: $0" + echo + echo "This script is intended to be run within one of CCCL's Dev Containers." + echo "It verifies that the expected environment variables and binary versions match what is expected." +} + +check_envvars() { + for var_name in "$@"; do + if [[ -z "${!var_name:-}" ]]; then + echo "::error:: ${var_name} variable is not set." + exit 1 + else + echo "$var_name=${!var_name}" + fi + done +} + +check_host_compiler_version() { + local version_output=$($CXX --version) + + if [[ "$CXX" == "g++" ]]; then + local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1) + local expected_compiler="gcc" + elif [[ "$CXX" == "clang++" ]]; then + if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then + actual_version=${BASH_REMATCH[1]} + else + echo "::error:: Unable to determine clang version." + exit 1 + fi + expected_compiler="llvm" + elif [[ "$CXX" == "icpc" ]]; then + local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 3 ) + # The icpc compiler version of oneAPI release 2023.2.0 is 2021.10.0 + if [[ "$actual_version" == "2021.10.0" ]]; then + actual_version="2023.2.0" + fi + expected_compiler="oneapi" + else + echo "::error:: Unexpected CXX value ($CXX)." + exit 1 + fi + + if [[ "$expected_compiler" != "${CCCL_HOST_COMPILER}" || "$actual_version" != "$CCCL_HOST_COMPILER_VERSION" ]]; then + echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CCCL_HOST_COMPILER}) and version (${CCCL_HOST_COMPILER_VERSION})." + exit 1 + else + echo "Detected host compiler: $CXX version $actual_version" + fi +} + +check_cuda_version() { + local cuda_version_output=$(nvcc --version) + if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then + local actual_cuda_version=${BASH_REMATCH[1]} + else + echo "::error:: Unable to determine CUDA version from nvcc." + exit 1 + fi + + if [[ "$actual_cuda_version" != "$CCCL_CUDA_VERSION" ]]; then + echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CCCL_CUDA_VERSION)." + exit 1 + else + echo "Detected CUDA version: $actual_cuda_version" + fi +} + +main() { + if [[ "$1" == "-h" || "$1" == "--help" ]]; then + usage + exit 0 + fi + + set -euo pipefail + + check_envvars DEVCONTAINER_NAME CXX CUDAHOSTCXX CCCL_BUILD_INFIX CCCL_HOST_COMPILER CCCL_CUDA_VERSION CCCL_HOST_COMPILER_VERSION + + check_host_compiler_version + + check_cuda_version + + echo "Dev Container successfully verified!" +} + +main "$@" diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..11130409 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,17 @@ +# Exclude these commits from git-blame and similar tools. +# +# To use this file, run the following command from the repo root: +# +# ``` +# $ git config blame.ignoreRevsFile .git-blame-ignore-revs +# ``` +# +# Include a brief comment with each commit added, for example: +# +# ``` +# 8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100 +# ``` +# +# Only add commits that are pure formatting changes (e.g. clang-format version changes, etc). +8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100 + diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml new file mode 100644 index 00000000..b8155e7a --- /dev/null +++ b/.github/actions/compute-matrix/action.yml @@ -0,0 +1,25 @@ + +name: Compute Matrix +description: "Compute the matrix for a given matrix type from the specified matrix file" + +inputs: + matrix_query: + description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" + required: true + matrix_file: + description: 'The file containing the matrix' + required: true +outputs: + matrix: + description: 'The requested matrix' + value: ${{ steps.compute-matrix.outputs.MATRIX }} + +runs: + using: "composite" + steps: + - name: Compute matrix + id: compute-matrix + run: | + MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) + echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT + shell: bash -euxo pipefail {0} diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh new file mode 100755 index 00000000..cd3946f1 --- /dev/null +++ b/.github/actions/compute-matrix/compute-matrix.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -euo pipefail + +write_output() { + local key="$1" + local value="$2" + echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" +} + +extract_matrix() { + local file="$1" + local type="$2" + local matrix=$(yq -o=json "$file" | jq -cr ".$type") + write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" + + local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc')" + local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" + write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix" + write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')" +} + +main() { + if [ "$1" == "-v" ]; then + set -x + shift + fi + + if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then + echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE" + echo " -v : Enable verbose output" + echo " MATRIX_FILE : The path to the matrix file." + echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'" + exit 1 + fi + + echo "Input matrix file:" >&2 + cat "$1" >&2 + echo "Matrix Type: $2" >&2 + + extract_matrix "$1" "$2" +} + +main "$@" diff --git a/.github/actions/configure_cccl_sccache/action.yml b/.github/actions/configure_cccl_sccache/action.yml new file mode 100644 index 00000000..e0ea2707 --- /dev/null +++ b/.github/actions/configure_cccl_sccache/action.yml @@ -0,0 +1,13 @@ +name: Set up AWS credentials and environment variables for sccache +description: "Set up AWS credentials and environment variables for sccache" +runs: + using: "composite" + steps: + - name: Set environment variables + run: | + echo "SCCACHE_BUCKET=rapids-sccache-devs" >> $GITHUB_ENV + echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV + echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV + echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV + echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV + shell: bash diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml new file mode 100644 index 00000000..895ba83e --- /dev/null +++ b/.github/copy-pr-bot.yaml @@ -0,0 +1,4 @@ +# Configuration file for `copy-pr-bot` GitHub App +# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ + +enabled: true diff --git a/.github/problem-matchers/problem-matcher.json b/.github/problem-matchers/problem-matcher.json new file mode 100644 index 00000000..f196a5c8 --- /dev/null +++ b/.github/problem-matchers/problem-matcher.json @@ -0,0 +1,14 @@ +{ + "problemMatcher": [ + { + "owner": "nvcc", + "pattern": [ + { + "regexp": "^\\/home\\/coder\\/(.+):(\\d+):(\\d+): (\\w+): \"(.+)\"$", + "severity": 4, + "message": 5 + } + ] + } + ] +} diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml new file mode 100644 index 00000000..e2364d7f --- /dev/null +++ b/.github/workflows/build-and-test-linux.yml @@ -0,0 +1,36 @@ +name: build and test + +defaults: + run: + shell: bash -exo pipefail {0} + +on: + workflow_call: + inputs: + cuda: {type: string, required: true} + host: {type: string, required: true} + cpu: {type: string, required: true} + test_name: {type: string, required: false} + build_script: {type: string, required: false} + test_script: {type: string, required: false} + container_image: {type: string, required: false} + run_tests: {type: boolean, required: false, default: true} + +permissions: + contents: read + +jobs: + build-and-test: + name: Build/Test ${{inputs.test_name}} + permissions: + id-token: write + contents: read + uses: ./.github/workflows/run-as-coder.yml + with: + cuda: ${{ inputs.cuda }} + host: ${{ inputs.host }} + name: Build/Test ${{inputs.test_name}} + runner: linux-${{inputs.cpu}}-gpu-v100-latest-1 + image: ${{ inputs.container_image }} + command: | + ${{ inputs.test_script }} diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml new file mode 100644 index 00000000..2cabf9d0 --- /dev/null +++ b/.github/workflows/build-and-test-windows.yml @@ -0,0 +1,49 @@ +name: Build Windows + +on: + workflow_call: + inputs: + test_name: {type: string, required: false} + build_script: {type: string, required: false} + test_script: {type: string, required: false} + container_image: {type: string, required: false} + +jobs: + prepare: + name: Build Only ${{inputs.test_name}} + runs-on: windows-amd64-cpu16 + permissions: + id-token: write + contents: read + env: + SCCACHE_BUCKET: rapids-sccache-devs + SCCACHE_REGION: us-east-2 + SCCACHE_IDLE_TIMEOUT: 0 + SCCACHE_S3_USE_SSL: true + SCCACHE_S3_NO_CREDENTIALS: false + steps: + - name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + aws-region: us-east-2 + role-duration-seconds: 43200 # 12 hours + - name: Fetch ${{ inputs.container_image }} + shell: powershell + run: docker pull ${{ inputs.container_image }} + - name: Run the tests + shell: powershell + run: >- + docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}') + [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}') + [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}') + git clone https://github.com/NVIDIA/nvbench.git; + cd nvbench; + git fetch --all; + git checkout ${{github.ref_name}}; + ${{inputs.build_script}};" diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml new file mode 100644 index 00000000..22ac2b2c --- /dev/null +++ b/.github/workflows/dispatch-build-and-test.yml @@ -0,0 +1,53 @@ +name: Dispatch build and test + +on: + workflow_call: + inputs: + project_name: {type: string, required: true} + per_cuda_compiler_matrix: {type: string, required: true} + devcontainer_version: {type: string, required: true} + is_windows: {type: boolean, required: true} + +permissions: + contents: read + +jobs: + # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration + # ensures that the build/test steps can overlap across different configurations. For example, + # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11. + build_and_test_linux: + name: build and test linux + permissions: + id-token: write + contents: read + if: ${{ !inputs.is_windows }} + uses: ./.github/workflows/build-and-test-linux.yml + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} + with: + cuda: ${{ matrix.cuda }} + host: ${{matrix.compiler.name}}${{matrix.compiler.version}} + cpu: ${{ matrix.cpu }} + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} ${{matrix.extra_build_args}} + build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}" + test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}" + container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} + + build_and_test_windows: + name: build and test windows + permissions: + id-token: write + contents: read + if: ${{ inputs.is_windows }} + uses: ./.github/workflows/build-and-test-windows.yml + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} + with: + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} + build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}" + test_script: "./ci/windows/test_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}" + container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}} diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 00000000..6d6708c3 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main workflow that runs on every PR and push to main +name: pr + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + push: + branches: + - "pull-request/[0-9]+" + +# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts. +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: read + +jobs: + compute-matrix: + name: Compute matrix + runs-on: ubuntu-latest + outputs: + DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}} + PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}} + PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Compute matrix outputs + id: set-outputs + run: | + .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request + + nvbench: + name: NVBench CUDA${{ matrix.cuda_host_combination }} + permissions: + id-token: write + contents: read + needs: compute-matrix + uses: ./.github/workflows/dispatch-build-and-test.yml + strategy: + fail-fast: false + matrix: + cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} + with: + project_name: "nvbench" + per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} + devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} + is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + + verify-devcontainers: + name: Verify Dev Containers + permissions: + id-token: write + contents: read + uses: ./.github/workflows/verify-devcontainers.yml + + # This job is the final job that runs after all other jobs and is used for branch protection status checks. + # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks + # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 + ci: + runs-on: ubuntu-latest + name: CI + if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success + needs: + - nvbench + - verify-devcontainers + steps: + - name: Check status of all precursor jobs + if: >- + ${{ + contains(needs.*.result, 'failure') + || contains(needs.*.result, 'cancelled') + || contains(needs.*.result, 'skipped') + }} + run: exit 1 diff --git a/.github/workflows/run-as-coder.yml b/.github/workflows/run-as-coder.yml new file mode 100644 index 00000000..c63f0256 --- /dev/null +++ b/.github/workflows/run-as-coder.yml @@ -0,0 +1,156 @@ +name: Run as coder user + +defaults: + run: + shell: bash -exo pipefail {0} + +on: + workflow_call: + inputs: + cuda: {type: string, required: true} + host: {type: string, required: true} + name: {type: string, required: true} + image: {type: string, required: true} + runner: {type: string, required: true} + command: {type: string, required: true} + env: { type: string, required: false, default: "" } + +permissions: + contents: read + +jobs: + run-as-coder: + name: ${{inputs.name}} + permissions: + id-token: write + contents: read + runs-on: ${{inputs.runner}} + container: + # This job now uses a docker-outside-of-docker (DOOD) strategy. + # + # The GitHub Actions runner application mounts the host's docker socket `/var/run/docker.sock` into the + # container. By using a container with the `docker` CLI, this container can launch docker containers + # using the host's docker daemon. + # + # This allows us to run actions that require node v20 in the `cruizba/ubuntu-dind:jammy-26.1.3` container, and + # then launch our Ubuntu18.04-based GCC 6/7 containers to build and test CCCL. + # + # The main inconvenience to this approach is that any container mounts have to match the paths of the runner host, + # not the paths as seen in the intermediate (`cruizba/ubuntu-dind`) container. + # + # Note: I am using `cruizba/ubuntu-dind:jammy-26.1.3` instead of `docker:latest`, because GitHub doesn't support + # JS actions in alpine aarch64 containers, instead failing actions with this error: + # ``` + # Error: JavaScript Actions in Alpine containers are only supported on x64 Linux runners. Detected Linux Arm64 + # ``` + image: cruizba/ubuntu-dind:jammy-26.1.3 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + path: nvbench + persist-credentials: false + - name: Add NVCC problem matcher + run: | + echo "::add-matcher::nvbench/.github/problem-matchers/problem-matcher.json" + - name: Configure credentials and environment variables for sccache + uses: ./nvbench/.github/actions/configure_cccl_sccache + - name: Run command + env: + CI: true + RUNNER: "${{inputs.runner}}" + COMMAND: "${{inputs.command}}" + AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}" + AWS_SESSION_TOKEN: "${{env.AWS_SESSION_TOKEN}}" + AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}" + run: | + echo "[host] github.workspace: ${{github.workspace}}" + echo "[container] GITHUB_WORKSPACE: ${GITHUB_WORKSPACE:-}" + echo "[container] PWD: $(pwd)" + + # Necessary because we're doing docker-outside-of-docker: + # Make a symlink in the container that matches the host's ${{github.workspace}}, so that way `$(pwd)` + # in `.devcontainer/launch.sh` constructs volume paths relative to the hosts's ${{github.workspace}}. + mkdir -p "$(dirname "${{github.workspace}}")" + ln -s "$(pwd)" "${{github.workspace}}" + + cd "${{github.workspace}}" + + cat <<"EOF" > ci.sh + + #! /usr/bin/env bash + set -eo pipefail + echo -e "\e[1;34mRunning as '$(whoami)' user in $(pwd):\e[0m" + echo -e "\e[1;34m${{inputs.command}}\e[0m" + eval "${{inputs.command}}" || exit_code=$? + if [ ! -z "$exit_code" ]; then + echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m" + echo "::error:: To replicate this failure locally, follow the steps below:" + echo "1. Clone the repository, and navigate to the correct branch and commit:" + echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" + echo "" + echo "2. Run the failed command inside the same Docker container used by the CI:" + echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}" + echo "" + echo "For additional information, see:" + echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md" + echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md" + exit $exit_code + fi + EOF + + chmod +x ci.sh + + mkdir "$RUNNER_TEMP/.aws"; + + cat < "$RUNNER_TEMP/.aws/config" + [default] + bucket=rapids-sccache-devs + region=us-east-2 + EOF + + cat < "$RUNNER_TEMP/.aws/credentials" + [default] + aws_access_key_id=$AWS_ACCESS_KEY_ID + aws_session_token=$AWS_SESSION_TOKEN + aws_secret_access_key=$AWS_SECRET_ACCESS_KEY + EOF + + chmod 0600 "$RUNNER_TEMP/.aws/credentials" + chmod 0664 "$RUNNER_TEMP/.aws/config" + + declare -a gpu_request=() + + # Explicitly pass which GPU to use if on a GPU runner + if [[ "${RUNNER}" = *"-gpu-"* ]]; then + gpu_request+=(--gpus "device=${NVIDIA_VISIBLE_DEVICES}") + fi + + host_path() { + sed "s@/__w@$(dirname "$(dirname "${{github.workspace}}")")@" <<< "$1" + } + + # Launch this container using the host's docker daemon + ${{github.event.repository.name}}/.devcontainer/launch.sh \ + --docker \ + --cuda ${{inputs.cuda}} \ + --host ${{inputs.host}} \ + "${gpu_request[@]}" \ + --env "CI=$CI" \ + --env "AWS_ROLE_ARN=" \ + --env "COMMAND=$COMMAND" \ + --env "GITHUB_ENV=$GITHUB_ENV" \ + --env "GITHUB_SHA=$GITHUB_SHA" \ + --env "GITHUB_PATH=$GITHUB_PATH" \ + --env "GITHUB_OUTPUT=$GITHUB_OUTPUT" \ + --env "GITHUB_ACTIONS=$GITHUB_ACTIONS" \ + --env "GITHUB_REF_NAME=$GITHUB_REF_NAME" \ + --env "GITHUB_WORKSPACE=$GITHUB_WORKSPACE" \ + --env "GITHUB_REPOSITORY=$GITHUB_REPOSITORY" \ + --env "GITHUB_STEP_SUMMARY=$GITHUB_STEP_SUMMARY" \ + --volume "${{github.workspace}}/ci.sh:/ci.sh" \ + --volume "$(host_path "$RUNNER_TEMP")/.aws:/root/.aws" \ + --volume "$(dirname "$(dirname "${{github.workspace}}")"):/__w" \ + -- /ci.sh diff --git a/.github/workflows/verify-devcontainers.yml b/.github/workflows/verify-devcontainers.yml new file mode 100644 index 00000000..4bbfa6b3 --- /dev/null +++ b/.github/workflows/verify-devcontainers.yml @@ -0,0 +1,86 @@ +name: Verify devcontainers + +on: + workflow_call: + +defaults: + run: + shell: bash -euo pipefail {0} + +permissions: + contents: read + +jobs: + verify-make-devcontainers: + name: Verify devcontainer files are up-to-date + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup jq and yq + run: | + sudo apt-get update + sudo apt-get install jq -y + sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64 + sudo chmod +x /usr/local/bin/yq + - name: Run the script to generate devcontainer files + run: | + ./.devcontainer/make_devcontainers.sh --verbose + - name: Check for changes + run: | + if [[ $(git diff --stat) != '' || $(git status --porcelain | grep '^??') != '' ]]; then + git diff --minimal + git status --porcelain + echo "::error:: Dev Container files are out of date or there are untracked files. Run the .devcontainer/make_devcontainers.sh script and commit the changes." + exit 1 + else + echo "::note::Dev Container files are up-to-date." + fi + + get-devcontainer-list: + needs: verify-make-devcontainers + name: Get list of devcontainer.json files + runs-on: ubuntu-latest + outputs: + devcontainers: ${{ steps.get-list.outputs.devcontainers }} + steps: + - name: Check out the code + uses: actions/checkout@v4 + - name: Get list of devcontainer.json paths and names + id: get-list + run: | + devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do + jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer" + done | jq -s -c .) + echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}" + + verify-devcontainers: + needs: get-devcontainer-list + name: ${{matrix.devcontainer.name}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}} + permissions: + id-token: write + contents: read + steps: + - name: Check out the code + uses: actions/checkout@v4 + # We don't really need sccache configured, but we need the AWS credentials envvars to be set + # in order to avoid the devcontainer hanging waiting for GitHub authentication + - name: Configure credentials and environment variables for sccache + uses: ./.github/actions/configure_cccl_sccache + - name: Run in devcontainer + uses: devcontainers/ci@v0.3 + with: + push: never + configFile: ${{ matrix.devcontainer.path }} + env: | + SCCACHE_REGION=${{ env.SCCACHE_REGION }} + AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }} + AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }} + AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }} + runCmd: | + .devcontainer/verify_devcontainer.sh diff --git a/.gitignore b/.gitignore index 57309ab5..50fac98d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,10 @@ build*/ +.aws +.vscode +.cache +.config .idea cmake-build-* *~ +compile_commands.json +CMakeUserPresets.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 369d6311..8eb5f883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,5 @@ -# 3.20.1 required for rapids-cmake -# 3.21.0 required for NVBench_ADD_DEPENDENT_DLLS_TO_* (MSVC only) -cmake_minimum_required(VERSION 3.20.1) +# 3.23.1 required for rapids-cmake +cmake_minimum_required(VERSION 3.23.1) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) @@ -22,6 +21,11 @@ project(NVBench nvbench_init_rapids_cmake() +# Define NVBench_DETECTED_${LANG}_STANDARDS +include(cmake/DetectSupportedStandards.cmake) +detect_supported_standards(NVBench CXX 17 20) +detect_supported_standards(NVBench CUDA 17 20) + # See NVIDIA/NVBench#52 find_package(CUDAToolkit REQUIRED) set(cupti_default ON) @@ -29,15 +33,22 @@ if (${CUDAToolkit_VERSION} VERSION_LESS 11.3) set(cupti_default OFF) endif() +option(BUILD_SHARED_LIBS "Build NVBench as a shared library" ON) + option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) +option(NVBench_ENABLE_HEADER_TESTING "Build NVBench testing suite." OFF) option(NVBench_ENABLE_DEVICE_TESTING "Include tests that require a GPU (with locked clocks)." OFF ) option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF) +option(NVBench_ENABLE_INSTALL_RULES "Install NVBench." ${NVBench_TOPLEVEL_PROJECT}) + +include(cmake/NVBenchUtilities.cmake) # Must be first +include(cmake/NVBenchClangdCompileInfo.cmake) # Must be before any targets are created include(cmake/NVBenchConfigTarget.cmake) include(cmake/NVBenchDependentDlls.cmake) @@ -45,13 +56,15 @@ include(cmake/NVBenchExports.cmake) include(cmake/NVBenchWriteConfigHeader.cmake) include(cmake/NVBenchDependencies.cmake) include(cmake/NVBenchInstallRules.cmake) -include(cmake/NVBenchUtilities.cmake) message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") add_subdirectory(nvbench) -if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING) +if (NVBench_ENABLE_EXAMPLES OR + NVBench_ENABLE_TESTING OR + NVBench_ENABLE_HEADER_TESTING) + include(CTest) enable_testing() endif() @@ -65,4 +78,8 @@ if (NVBench_ENABLE_TESTING) add_subdirectory(testing) endif() +if (NVBench_ENABLE_HEADER_TESTING) + include(cmake/NVBenchHeaderTesting.cmake) +endif() + nvbench_generate_exports() diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 00000000..3e66f9ad --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,74 @@ +{ + "version": 3, + "cmakeMinimumRequired": { + "major": 3, + "minor": 23, + "patch": 1 + }, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/$env{CCCL_BUILD_INFIX}/${presetName}", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "CMAKE_CUDA_ARCHITECTURES": "all-major", + "NVBench_ENABLE_CUPTI": true, + "NVBench_ENABLE_DEVICE_TESTING": false, + "NVBench_ENABLE_EXAMPLES": true, + "NVBench_ENABLE_HEADER_TESTING": true, + "NVBench_ENABLE_INSTALL_RULES": true, + "NVBench_ENABLE_NVML": true, + "NVBench_ENABLE_TESTING": true, + "NVBench_ENABLE_WERROR": true + } + }, + { + "name": "nvbench-dev", + "displayName": "Developer Build", + "inherits": "base", + "cacheVariables": { + "NVBench_ENABLE_DEVICE_TESTING": true + } + }, + { + "name": "nvbench-ci", + "displayName": "NVBench CI", + "inherits": "base" + } + ], + "buildPresets": [ + { + "name": "nvbench-dev", + "configurePreset": "nvbench-dev" + }, + { + "name": "nvbench-ci", + "configurePreset": "nvbench-ci" + } + ], + "testPresets": [ + { + "name": "base", + "hidden": true, + "output": { + "outputOnFailure": true + }, + "execution": { + "noTestsAction": "error", + "stopOnFailure": false + } + }, + { + "name": "nvbench-dev", + "configurePreset": "nvbench-dev", + "inherits": "base" + }, + { + "name": "nvbench-ci", + "configurePreset": "nvbench-ci", + "inherits": "base" + } + ] +} diff --git a/README.md b/README.md index c1cad5ad..285213f1 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,15 @@ features: * Executes the benchmark multiple times back-to-back and records total time. * Reports the average execution time (total time / number of executions). +# Supported Compilers and Tools + +- CMake > 2.23.1 +- CUDA Toolkit + nvcc: 11.1 -> 12.4 +- g++: 7 -> 12 +- clang++: 9 -> 18 +- cl.exe: 2019 -> 2022 (19.29, 29.39) +- Headers are tested with C++17 -> C++20. + # Getting Started ## Minimal Benchmark @@ -34,7 +43,7 @@ A basic kernel benchmark can be created with just a few lines of CUDA C++: ```cpp void my_benchmark(nvbench::state& state) { - state.exec([](nvbench::launch& launch) { + state.exec([](nvbench::launch& launch) { my_kernel<<>>(); }); } @@ -72,7 +81,7 @@ mkdir -p build cd build cmake -DNVBench_ENABLE_EXAMPLES=ON -DCMAKE_CUDA_ARCHITECTURES=70 .. && make ``` -Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on. +Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on. Examples are built by default into `build/bin` and are prefixed with `nvbench.example`. @@ -119,7 +128,7 @@ Pass: Batch: 0.261963ms GPU, 7.18s total GPU, 27394x ## Demo Project To get started using NVBench with your own kernels, consider trying out -the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo). +the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo). `nvbench_demo` provides a simple CMake project that uses NVBench to build an example benchmark. It's a great way to experiment with the library without a lot @@ -129,7 +138,7 @@ of investment. Contributions are welcome! -For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors. +For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors. ## Tests @@ -146,7 +155,7 @@ To run all tests: ``` make test ``` -or +or ``` ctest ``` diff --git a/ci/axis/cpu.yml b/ci/axis/cpu.yml deleted file mode 100644 index 7230b666..00000000 --- a/ci/axis/cpu.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -SDK_TYPE: - - cuda - -SDK_VER: - - 11.5.1-devel - -OS_TYPE: - - ubuntu - -OS_VER: - - 20.04 - -CXX_TYPE: - - clang - - gcc - -CXX_VER: - - 5 - - 6 - - 7 - - 8 - - 9 - - 10 - - 11 - - 12 - -exclude: - - CXX_TYPE: clang - CXX_VER: 5 - - CXX_TYPE: clang - CXX_VER: 6 - - CXX_TYPE: gcc - CXX_VER: 12 diff --git a/ci/axis/gpu.yml b/ci/axis/gpu.yml deleted file mode 100644 index 15310794..00000000 --- a/ci/axis/gpu.yml +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -SDK_TYPE: - - cuda - -SDK_VER: - - 11.5.1-devel - -OS_TYPE: - - ubuntu - -OS_VER: - - 20.04 - -CXX_TYPE: - - clang - - gcc - -CXX_VER: - - 11 - - 12 - -exclude: - - CXX_TYPE: clang - CXX_VER: 11 - - CXX_TYPE: gcc - CXX_VER: 12 diff --git a/ci/build_common.sh b/ci/build_common.sh new file mode 100755 index 00000000..2c30414a --- /dev/null +++ b/ci/build_common.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +set -eo pipefail + +# Ensure the script is being executed in its containing directory +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"; + +# Script defaults +HOST_COMPILER=${CXX:-g++} # $CXX if set, otherwise `g++` +CXX_STANDARD=17 +CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc` +CUDA_ARCHS= # Empty, use presets by default. +GLOBAL_CMAKE_OPTIONS=() +DISABLE_CUB_BENCHMARKS= # Enable to force-disable building CUB benchmarks. + +# Check if the correct number of arguments has been provided +function usage { + echo "Usage: $0 [OPTIONS]" + echo + echo "The PARALLEL_LEVEL environment variable controls the amount of build parallelism. Default is the number of cores." + echo + echo "Options:" + echo " -v/--verbose: enable shell echo for debugging" + echo " -cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)" + echo " -cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)" + echo " -std: CUDA/C++ standard (Defaults to 17)" + echo " -arch: Target CUDA arches, e.g. \"60-real;70;80-virtual\" (Defaults to value in presets file)" + echo " -cmake-options: Additional options to pass to CMake" + echo + echo "Examples:" + echo " $ PARALLEL_LEVEL=8 $0" + echo " $ PARALLEL_LEVEL=8 $0 -cxx g++-9" + echo " $ $0 -cxx clang++-8" + echo " $ $0 -cxx g++-8 -std 20 -arch 80-real -v -cuda /usr/local/bin/nvcc" + echo " $ $0 -cmake-options \"-DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=-Wfatal-errors\"" + exit 1 +} + +# Parse options + +# Copy the args into a temporary array, since we will modify them and +# the parent script may still need them. +args=("$@") +while [ "${#args[@]}" -ne 0 ]; do + case "${args[0]}" in + -v | --verbose) VERBOSE=1; args=("${args[@]:1}");; + -cxx) HOST_COMPILER="${args[1]}"; args=("${args[@]:2}");; + -std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");; + -cuda) CUDA_COMPILER="${args[1]}"; args=("${args[@]:2}");; + -arch) CUDA_ARCHS="${args[1]}"; args=("${args[@]:2}");; + -disable-benchmarks) DISABLE_CUB_BENCHMARKS=1; args=("${args[@]:1}");; + -cmake-options) + if [ -n "${args[1]}" ]; then + IFS=' ' read -ra split_args <<< "${args[1]}" + GLOBAL_CMAKE_OPTIONS+=("${split_args[@]}") + args=("${args[@]:2}") + else + echo "Error: No arguments provided for -cmake-options" + usage + exit 1 + fi + ;; + -h | -help | --help) usage ;; + *) echo "Unrecognized option: ${args[0]}"; usage ;; + esac +done + +# Convert to full paths: +HOST_COMPILER=$(which ${HOST_COMPILER}) +CUDA_COMPILER=$(which ${CUDA_COMPILER}) + +if [[ -n "${CUDA_ARCHS}" ]]; then + GLOBAL_CMAKE_OPTIONS+=("-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}") +fi + +if [ $VERBOSE ]; then + set -x +fi + +# Begin processing unsets after option parsing +set -u + +readonly PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)} + +if [ -z ${CCCL_BUILD_INFIX+x} ]; then + CCCL_BUILD_INFIX="" +fi + +# Presets will be configured in this directory: +BUILD_DIR="../build/${CCCL_BUILD_INFIX}" + +# The most recent build will always be symlinked to cccl/build/latest +mkdir -p $BUILD_DIR +rm -f ../build/latest +ln -sf $BUILD_DIR ../build/latest + +# Now that BUILD_DIR exists, use readlink to canonicalize the path: +BUILD_DIR=$(readlink -f "${BUILD_DIR}") + +# Prepare environment for CMake: +export CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL}" +export CTEST_PARALLEL_LEVEL="1" +export CXX="${HOST_COMPILER}" +export CUDACXX="${CUDA_COMPILER}" +export CUDAHOSTCXX="${HOST_COMPILER}" +export CXX_STANDARD + +source ./pretty_printing.sh + +print_environment_details() { + begin_group "⚙️ Environment Details" + + echo "pwd=$(pwd)" + + print_var_values \ + BUILD_DIR \ + CXX_STANDARD \ + CXX \ + CUDACXX \ + CUDAHOSTCXX \ + NVCC_VERSION \ + CMAKE_BUILD_PARALLEL_LEVEL \ + CTEST_PARALLEL_LEVEL \ + CCCL_BUILD_INFIX \ + GLOBAL_CMAKE_OPTIONS + + echo "Current commit is:" + git log -1 || echo "Not a repository" + + if command -v nvidia-smi &> /dev/null; then + nvidia-smi + else + echo "nvidia-smi not found" + fi + + end_group "⚙️ Environment Details" +} + +fail_if_no_gpu() { + if ! nvidia-smi &> /dev/null; then + echo "Error: No NVIDIA GPU detected. Please ensure you have an NVIDIA GPU installed and the drivers are properly configured." >&2 + exit 1 + fi +} + +function print_test_time_summary() +{ + ctest_log=${1} + + if [ -f ${ctest_log} ]; then + begin_group "⏱️ Longest Test Steps" + # Only print the full output in CI: + if [ -n "${GITHUB_ACTIONS:-}" ]; then + cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake + else + cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake | head -n 15 + fi + end_group "⏱️ Longest Test Steps" + fi +} + +function configure_preset() +{ + local BUILD_NAME=$1 + local PRESET=$2 + local CMAKE_OPTIONS=$3 + local GROUP_NAME="🛠️ CMake Configure ${BUILD_NAME}" + + pushd .. > /dev/null + run_command "$GROUP_NAME" cmake --preset=$PRESET --log-level=VERBOSE "${GLOBAL_CMAKE_OPTIONS[@]}" $CMAKE_OPTIONS + status=$? + popd > /dev/null + return $status +} + +function build_preset() { + local BUILD_NAME=$1 + local PRESET=$2 + local green="1;32" + local red="1;31" + local GROUP_NAME="🏗️ Build ${BUILD_NAME}" + + source "./sccache_stats.sh" "start" + + pushd .. > /dev/null + run_command "$GROUP_NAME" cmake --build --preset=$PRESET -v + status=$? + popd > /dev/null + + minimal_sccache_stats=$(source "./sccache_stats.sh" "end") + + # Only print detailed stats in actions workflow + if [ -n "${GITHUB_ACTIONS:-}" ]; then + begin_group "💲 sccache stats" + echo "${minimal_sccache_stats}" + sccache -s + end_group + + begin_group "🥷 ninja build times" + echo "The "weighted" time is the elapsed time of each build step divided by the number + of tasks that were running in parallel. This makes it an excellent approximation + of how "important" a slow step was. A link that is entirely or mostly serialized + will have a weighted time that is the same or similar to its elapsed time. A + compile that runs in parallel with 999 other compiles will have a weighted time + that is tiny." + ./ninja_summary.py -C ${BUILD_DIR}/${PRESET} || echo "ninja_summary.py failed" + end_group + else + echo $minimal_sccache_stats + fi + + return $status +} + +function test_preset() +{ + local BUILD_NAME=$1 + local PRESET=$2 + local GROUP_NAME="🚀 Test ${BUILD_NAME}" + + fail_if_no_gpu + + + ctest_log_dir="${BUILD_DIR}/log/ctest" + ctest_log="${ctest_log_dir}/${PRESET}" + mkdir -p "${ctest_log_dir}" + + pushd .. > /dev/null + run_command "$GROUP_NAME" ctest --output-log "${ctest_log}" --preset=$PRESET + status=$? + popd > /dev/null + + print_test_time_summary ${ctest_log} + + return $status +} + +function configure_and_build_preset() +{ + local BUILD_NAME=$1 + local PRESET=$2 + local CMAKE_OPTIONS=$3 + + configure_preset "$BUILD_NAME" "$PRESET" "$CMAKE_OPTIONS" + build_preset "$BUILD_NAME" "$PRESET" +} diff --git a/ci/build_nvbench.sh b/ci/build_nvbench.sh new file mode 100755 index 00000000..e9ba372e --- /dev/null +++ b/ci/build_nvbench.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +source "$(dirname "$0")/build_common.sh" + +print_environment_details + +PRESET="nvbench-ci" + +CMAKE_OPTIONS="" + +function version_lt() { + local lhs="${1//v/}" + local rhs="${2//v/}" + # If the versions are equal, return false + [ "$lhs" = "$rhs" ] && return 1 + # If the left-hand side is less than the right-hand side, return true + [ "$lhs" = `echo -e "$lhs\n$rhs" | sort -V | head -n1` ] +} + +# If CUDA_COMPILER is nvcc and the version < 11.3, disable CUPTI +if [[ "$CUDA_COMPILER" == *"nvcc"* ]]; then + CUDA_VERSION=$(nvcc --version | grep release | sed -r 's/.*release ([0-9.]+).*/\1/') + if version_lt "$CUDA_VERSION" "11.3"; then + CMAKE_OPTIONS+=" -DNVBench_ENABLE_CUPTI=OFF " + fi +fi + +configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" + +print_time_summary diff --git a/ci/common/build.bash b/ci/common/build.bash deleted file mode 100755 index 61b3654c..00000000 --- a/ci/common/build.bash +++ /dev/null @@ -1,231 +0,0 @@ -#! /usr/bin/env bash - -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -################################################################################ -# NVBench build script for gpuCI -################################################################################ - -set -e - -# append variable value -# Appends ${value} to ${variable}, adding a space before ${value} if -# ${variable} is not empty. -function append { - tmp="${!1:+${!1} }${2}" - eval "${1}=\${tmp}" -} - -# log args... -# Prints out ${args[*]} with a gpuCI log prefix and a newline before and after. -function log() { - printf "\n>>>> %s\n\n" "${*}" -} - -# print_with_trailing_blank_line args... -# Prints ${args[*]} with one blank line following, preserving newlines within -# ${args[*]} but stripping any preceding ${args[*]}. -function print_with_trailing_blank_line { - printf "%s\n\n" "${*}" -} - -# echo_and_run name args... -# Echo ${args[@]}, then execute ${args[@]} -function echo_and_run { - echo "${1}: ${@:2}" - ${@:2} -} - -# echo_and_run_timed name args... -# Echo ${args[@]}, then execute ${args[@]} and report how long it took, -# including ${name} in the output of the time. -function echo_and_run_timed { - echo "${@:2}" - TIMEFORMAT=$'\n'"${1} Time: %lR" - time ${@:2} -} - -# join_delimit [value [value [...]]] -# Combine all values into a single string, separating each by a single character -# delimiter. Eg: -# foo=(bar baz kramble) -# joined_foo=$(join_delimit "|" "${foo[@]}") -# echo joined_foo # "bar|baz|kramble" -function join_delimit { - local IFS="${1}" - shift - echo "${*}" -} - -################################################################################ -# VARIABLES - Set up bash and environmental variables. -################################################################################ - -# Get the variables the Docker container set up for us: ${CXX}, ${CUDACXX}, etc. -source /etc/cccl.bashrc - -# Set path. -export PATH=/usr/local/cuda/bin:${PATH} - -# Set home to the job's workspace. -export HOME=${WORKSPACE} - -# Switch to the build directory. -cd ${WORKSPACE} -mkdir -p build -cd build - -# Remove any old .ninja_log file so the PrintNinjaBuildTimes step is accurate: -rm -f .ninja_log - -if [[ -z "${CMAKE_BUILD_TYPE}" ]]; then - CMAKE_BUILD_TYPE="Release" -fi - -CMAKE_BUILD_FLAGS="--" - -# The Docker image sets up `${CXX}` and `${CUDACXX}`. -append CMAKE_FLAGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" -append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER='${CUDACXX}'" - -if [[ "${CXX_TYPE}" == "nvcxx" ]]; then - echo "nvc++ not supported." - exit 1 -else - if [[ "${CXX_TYPE}" == "icc" ]]; then - echo "icc not supported." - exit 1 - fi - # We're using NVCC so we need to set the host compiler. - append CMAKE_FLAGS "-DCMAKE_CXX_COMPILER='${CXX}'" - append CMAKE_FLAGS "-DCMAKE_CUDA_HOST_COMPILER='${CXX}'" - append CMAKE_FLAGS "-G Ninja" - # Don't stop on build failures. - append CMAKE_BUILD_FLAGS "-k0" -fi - -if [[ -n "${PARALLEL_LEVEL}" ]]; then - DETERMINE_PARALLELISM_FLAGS="-j ${PARALLEL_LEVEL}" -fi - -WSL=0 -if [[ $(grep -i microsoft /proc/version) ]]; then - echo "Windows Subsystem for Linux detected." - WSL=1 -fi -export WSL - -#append CMAKE_FLAGS "-DCMAKE_CUDA_ARCHITECTURES=all" - -append CMAKE_FLAGS "-DNVBench_ENABLE_EXAMPLES=ON" -append CMAKE_FLAGS "-DNVBench_ENABLE_TESTING=ON" -append CMAKE_FLAGS "-DNVBench_ENABLE_CUPTI=ON" -append CMAKE_FLAGS "-DNVBench_ENABLE_WERROR=ON" - -# These consume a lot of time and don't currently have -# any value as regression tests. -append CMAKE_FLAGS "-DNVBench_ENABLE_DEVICE_TESTING=OFF" - -# NVML doesn't work under WSL -if [[ ${WSL} -eq 0 ]]; then - append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=ON" -else - append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=OFF" -fi - -if [[ -n "${@}" ]]; then - append CMAKE_BUILD_FLAGS "${@}" -fi - -append CTEST_FLAGS "--output-on-failure" - -# Export variables so they'll show up in the logs when we report the environment. -export CMAKE_FLAGS -export CMAKE_BUILD_FLAGS -export CTEST_FLAGS - -################################################################################ -# ENVIRONMENT - Configure and print out information about the environment. -################################################################################ - -log "Determine system topology..." - -# Set `${PARALLEL_LEVEL}` if it is unset; otherwise, this just reports the -# system topology. -source ${WORKSPACE}/ci/common/determine_build_parallelism.bash ${DETERMINE_PARALLELISM_FLAGS} - -log "Get environment..." - -env | sort - -log "Check versions..." - -# We use sed and echo below to ensure there is always one and only trailing -# line following the output from each tool. - -${CXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/' - -echo - -${CUDACXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/' - -echo - -cmake --version 2>&1 | sed -Ez '$ s/\n*$/\n/' - -echo - -if [[ "${BUILD_TYPE}" == "gpu" ]]; then - nvidia-smi 2>&1 | sed -Ez '$ s/\n*$/\n/' -fi - -################################################################################ -# BUILD -################################################################################ - -log "Configure..." - -echo_and_run_timed "Configure" cmake .. --log-level=VERBOSE ${CMAKE_FLAGS} -configure_status=$? - -log "Build..." - -# ${PARALLEL_LEVEL} needs to be passed after we run -# determine_build_parallelism.bash, so it can't be part of ${CMAKE_BUILD_FLAGS}. -set +e # Don't stop on build failures. -echo_and_run_timed "Build" cmake --build . ${CMAKE_BUILD_FLAGS} -j ${PARALLEL_LEVEL} -build_status=$? -set -e - -################################################################################ -# TEST - Run examples and tests. -################################################################################ - -log "Test..." - -( - # Make sure test_status captures ctest, not tee: - # https://stackoverflow.com/a/999259/11130318 - set -o pipefail - echo_and_run_timed "Test" ctest ${CTEST_FLAGS} -j ${PARALLEL_LEVEL} | tee ctest_log -) - -test_status=$? - -################################################################################ -# SUMMARY - Print status of each step and exit with failure if needed. -################################################################################ - -log "Summary:" -echo "- Configure Error Code: ${configure_status}" -echo "- Build Error Code: ${build_status}" -echo "- Test Error Code: ${test_status}" - -if [[ "${configure_status}" != "0" ]] || \ - [[ "${build_status}" != "0" ]] || \ - [[ "${test_status}" != "0" ]]; then - exit 1 -fi diff --git a/ci/common/determine_build_parallelism.bash b/ci/common/determine_build_parallelism.bash deleted file mode 100755 index 1a1cf4c7..00000000 --- a/ci/common/determine_build_parallelism.bash +++ /dev/null @@ -1,119 +0,0 @@ -#! /usr/bin/env bash - -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -function usage { - echo "Usage: ${0} [flags...]" - echo - echo "Examine the system topology to determine a reasonable amount of build" - echo "parallelism." - echo - echo "Exported variables:" - echo " \${LOGICAL_CPUS} : Logical processors (e.g. threads)." - echo " \${PHYSICAL_CPUS} : Physical processors (e.g. cores)." - echo " \${TOTAL_MEM} : Total system memory [GB]." - echo " \${MAX_THREADS_PER_CORE} : Maximum threads per core allowed." - echo " \${MIN_MEMORY_PER_THREAD} : Minimum memory [GB] per thread allowed." - echo " \${CPU_BOUND_THREADS} : # of build threads constrained by processors." - echo " \${MEM_BOUND_THREADS} : # of build threads constrained by memory [GB]." - echo " \${PARALLEL_LEVEL} : Determined # of build threads." - echo " \${MEM_PER_THREAD} : Memory [GB] per build thread." - echo - echo "-h, -help, --help" - echo " Print this message." - echo - echo "-q, --quiet" - echo " Print nothing and only export variables." - echo - echo "-j , --jobs " - echo " Explicitly set the number of build threads to use." - echo - echo "--max-threads-per-core " - echo " Specify the maximum threads per core allowed (default: ${MAX_THREADS_PER_CORE} [threads/core])." - echo - echo "--min-memory-per-thread " - echo " Specify the minimum memory per thread allowed (default: ${MIN_MEMORY_PER_THREAD} [GBs/thread])." - - exit -3 -} - -QUIET=0 - -export MAX_THREADS_PER_CORE=2 -export MIN_MEMORY_PER_THREAD=1 # [GB] - -while test ${#} != 0 -do - case "${1}" in - -h) ;& - -help) ;& - --help) usage ;; - -q) ;& - --quiet) QUIET=1 ;; - -j) ;& - --jobs) - shift # The next argument is the number of threads. - PARALLEL_LEVEL="${1}" - ;; - --max-threads-per-core) - shift # The next argument is the number of threads per core. - MAX_THREADS_PER_CORE="${1}" - ;; - --min-memory-per-thread) - shift # The next argument is the amount of memory per thread. - MIN_MEMORY_PER_THREAD="${1}" - ;; - esac - shift -done - -# https://stackoverflow.com/a/23378780 -if [ $(uname) == "Darwin" ]; then - export LOGICAL_CPUS=$(sysctl -n hw.logicalcpu_max) - export PHYSICAL_CPUS=$(sysctl -n hw.physicalcpu_max) -else - export LOGICAL_CPUS=$(lscpu -p | egrep -v '^#' | wc -l) - export PHYSICAL_CPUS=$(lscpu -p | egrep -v '^#' | sort -u -t, -k 2,4 | wc -l) -fi - -export TOTAL_MEM=$(awk "BEGIN { printf \"%0.4g\", $(grep MemTotal /proc/meminfo | awk '{ print $2 }') / (1024 * 1024) }") - -export CPU_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${PHYSICAL_CPUS} * ${MAX_THREADS_PER_CORE}) }") -export MEM_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${TOTAL_MEM} / ${MIN_MEMORY_PER_THREAD}) }") - -if [[ -z "${PARALLEL_LEVEL}" ]]; then - # Pick the smaller of the two as the default. - if [[ "${MEM_BOUND_THREADS}" -lt "${CPU_BOUND_THREADS}" ]]; then - export PARALLEL_LEVEL=${MEM_BOUND_THREADS} - else - export PARALLEL_LEVEL=${CPU_BOUND_THREADS} - fi -else - EXPLICIT_PARALLEL_LEVEL=1 -fi - -# This can be a floating point number. -export MEM_PER_THREAD=$(awk "BEGIN { printf \"%.04g\", ${TOTAL_MEM} / ${PARALLEL_LEVEL} }") - -if [[ "${QUIET}" == 0 ]]; then - echo "Logical CPUs: ${LOGICAL_CPUS} [threads]" - echo "Physical CPUs: ${PHYSICAL_CPUS} [cores]" - echo "Total Mem: ${TOTAL_MEM} [GBs]" - echo "Max Threads Per Core: ${MAX_THREADS_PER_CORE} [threads/core]" - echo "Min Memory Per Threads: ${MIN_MEMORY_PER_THREAD} [GBs/thread]" - echo "CPU Bound Threads: ${CPU_BOUND_THREADS} [threads]" - echo "Mem Bound Threads: ${MEM_BOUND_THREADS} [threads]" - - echo -n "Parallel Level: ${PARALLEL_LEVEL} [threads]" - if [[ -n "${EXPLICIT_PARALLEL_LEVEL}" ]]; then - echo " (explicitly set)" - else - echo - fi - - echo "Mem Per Thread: ${MEM_PER_THREAD} [GBs/thread]" -fi - diff --git a/ci/cpu/build.bash b/ci/cpu/build.bash deleted file mode 100755 index edf1ba31..00000000 --- a/ci/cpu/build.bash +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/env bash - -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -################################################################################ -# NVBench build script for gpuCI (CPU-only) -################################################################################ - -export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} - -source ${WORKSPACE}/ci/common/build.bash diff --git a/ci/gpu/build.bash b/ci/gpu/build.bash deleted file mode 100755 index 9f6fc01f..00000000 --- a/ci/gpu/build.bash +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/env bash - -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -################################################################################ -# NVBench build script for gpuCI (heterogeneous) -################################################################################ - -export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} - -source ${WORKSPACE}/ci/common/build.bash diff --git a/ci/local/build.bash b/ci/local/build.bash deleted file mode 100755 index 60d22dea..00000000 --- a/ci/local/build.bash +++ /dev/null @@ -1,215 +0,0 @@ -#! /usr/bin/env bash - -# Copyright (c) 2018-2020 NVIDIA Corporation -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Released under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. - -################################################################################ -# NVBench local containerized build script -################################################################################ - -function usage { - echo "Usage: ${0} [flags...] [cmake-targets...]" - echo - echo "Build and test your local repository using a gpuCI Docker image." - echo "If CMake targets are specified, only those targets are built and tested." - echo "Otherwise, everything is built and tested." - echo - echo "-h, -help, --help" - echo " Print this message." - echo - echo "-r , --repository " - echo " Path to the repository (default: ${REPOSITORY_PATH})." - echo - echo "-i , --image " - echo " Docker image to use (default: ${IMAGE})" - echo - echo "-l, --local-image" - echo " Use the local version of the image instead of pulling from Docker hub." - echo - echo "-s, --shell-only" - echo " Skip building and testing and launch an interactive shell instead." - echo - echo "-d, --disable-gpus" - echo " Don't start the container with the NVIDIA runtime and GPUs attached." - echo - echo "-c, --clean" - echo " If the build directory already exists, delete it." - echo - echo "-j , --jobs " - echo " Number of threads to use when building (default: inferred)." - echo - echo "-b , --cmake-build-type " - echo " CMake build type to use, either Release, RelWithDebInfo, or Debug" - echo " (default: ${CMAKE_BUILD_TYPE})." - echo - - exit -3 -} - -SCRIPT_PATH=$(cd $(dirname ${0}); pwd -P) - -REPOSITORY_PATH=$(realpath ${SCRIPT_PATH}/../..) - -################################################################################ -# FLAGS - Process command line flags. -################################################################################ - -IMAGE="gpuci/cccl:cuda11.5.1-devel-ubuntu20.04-gcc9" - -LOCAL_IMAGE=0 - -SHELL_ONLY=0 - -BUILD_TYPE="gpu" - -CLEAN=0 - -PARALLEL_LEVEL="" - -CMAKE_BUILD_TYPE="Release" - -TARGETS="" - -while test ${#} != 0 -do - case "${1}" in - -h) ;& - -help) ;& - --help) usage ;; - -r) ;& - --repository) - shift # The next argument is the path. - REPOSITORY_PATH="${1}" - ;; - -i) ;& - --image) - shift # The next argument is the image. - IMAGE="${1}" - ;; - -l) ;& - --local-image) LOCAL_IMAGE=1 ;; - -s) ;& - --shell-only) SHELL_ONLY=1 ;; - -d) ;& - --disable-gpus) BUILD_TYPE="cpu" ;; - -c) ;& - --clean) CLEAN=1 ;; - -j) ;& - --jobs) - shift # The next argument is the number of threads. - PARALLEL_LEVEL="${1}" - ;; - -b) ;& - --cmake-build-type) - shift # The next argument is the build type. - CMAKE_BUILD_TYPE="${1}" - ;; - *) - TARGETS="${TARGETS:+${TARGETS} }${1}" - ;; - esac - shift -done - -################################################################################ -# PATHS - Setup paths for the container. -################################################################################ - -# ${REPOSITORY_PATH} is the local filesystem path to the Git repository being -# built and tested. It can be set with the --repository flag. -# -# ${BUILD_PATH} is the local filesystem path that will be used for the build. It -# is named after the image name, allowing multiple image builds to coexist on -# the local filesystem. -# -# ${REPOSITORY_PATH_IN_CONTAINER} is the location of ${REPOSITORY_PATH} inside -# the container. -# -# ${BUILD_PATH_IN_CONTAINER} is the location of ${BUILD_PATH} inside the -# container. - -BUILD_PATH=${REPOSITORY_PATH}/build_$(echo "$(basename "${IMAGE}")" | sed -e 's/:/_/g' | sed -e 's/-/_/g') - -if [[ "${CLEAN}" != 0 ]]; then - rm -rf ${BUILD_PATH} -fi - -mkdir -p ${BUILD_PATH} - -BASE_PATH_IN_CONTAINER="/cccl" - -REPOSITORY_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")" - -BUILD_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")/build" - -################################################################################ -# ENVIRONMENT - Setup the thunk build script that will be run by the container. -################################################################################ - -# We have to run `ldconfig` to rebuild `ld.so.cache` to work around this -# failure on Debian: https://github.com/NVIDIA/nvidia-docker/issues/1399 - -COMMAND="sudo ldconfig; sudo ldconfig" -if [[ "${SHELL_ONLY}" != 0 ]]; then - COMMAND="${COMMAND}; bash" -else - COMMAND="${COMMAND}; ${REPOSITORY_PATH_IN_CONTAINER}/ci/common/build.bash ${TARGETS} || bash" -fi - -################################################################################ -# GPU - Setup GPUs. -################################################################################ - -# Note: We always start docker with --gpus, even for cpu builds. Otherwise -# libcuda.so.1 is not present and no NVBench tests are able to run. - -# Limit GPUs available to the container based on ${CUDA_VISIBLE_DEVICES}. -if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then - VISIBLE_DEVICES="all" -else - VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES}" -fi - -DOCKER_MAJOR_VER=$(docker -v | sed 's/[^[0-9]*\([0-9]*\).*/\1/') -GPU_OPTS="--gpus device=${VISIBLE_DEVICES}" -if [[ "${DOCKER_MAJOR_VER}" -lt 19 ]] -then - GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${VISIBLE_DEVICES}'" -fi - -################################################################################ -# LAUNCH - Pull and launch the container. -################################################################################ - -#NVIDIA_DOCKER_INSTALLED=$(docker info 2>&1 | grep -i runtime | grep -c nvidia) -NVIDIA_DOCKER_INSTALLED=1 # Broken on WSL -if [[ "${NVIDIA_DOCKER_INSTALLED}" == 0 ]]; then - echo "NVIDIA Docker not found, please install it: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-docker-ce" - exit -4 -fi - -if [[ "${LOCAL_IMAGE}" == 0 ]]; then - docker pull "${IMAGE}" -fi - -docker run --rm -it ${GPU_OPTS} \ - --cap-add=SYS_PTRACE \ - --user "$(id -u)":"$(id -g)" \ - -v "${REPOSITORY_PATH}":"${REPOSITORY_PATH_IN_CONTAINER}" \ - -v "${BUILD_PATH}":"${BUILD_PATH_IN_CONTAINER}" \ - -v /etc/passwd:/etc/passwd:ro \ - -v /etc/group:/etc/group:ro \ - -v /etc/subuid:/etc/subuid:ro \ - -v /etc/subgid:/etc/subgid:ro \ - -v /etc/shadow:/etc/shadow:ro \ - -v /etc/gshadow:/etc/gshadow:ro \ - -e "WORKSPACE=${REPOSITORY_PATH_IN_CONTAINER}" \ - -e "BUILD_TYPE=${BUILD_TYPE}" \ - -e "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" \ - -e "COVERAGE_PLAN=${COVERAGE_PLAN}" \ - -e "PARALLEL_LEVEL=${PARALLEL_LEVEL}" \ - -w "${BUILD_PATH_IN_CONTAINER}" \ - "${IMAGE}" bash -c "${COMMAND}" - diff --git a/ci/matrix.yaml b/ci/matrix.yaml new file mode 100644 index 00000000..6da2d332 --- /dev/null +++ b/ci/matrix.yaml @@ -0,0 +1,86 @@ + +cuda_prev_min: &cuda_prev_min '11.1' # Does not support the CUPTI APIs we use (added in 11.3) +cuda_prev_max: &cuda_prev_max '11.8' +cuda_curr_min: &cuda_curr_min '12.0' +cuda_curr_max: &cuda_curr_max '12.6' + +# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers +devcontainer_version: '24.12' + +# gcc compiler configurations +gcc7: &gcc7 { name: 'gcc', version: '7', exe: 'g++' } +gcc8: &gcc8 { name: 'gcc', version: '8', exe: 'g++' } +gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' } +gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' } +gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' } +gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' } +gcc12: &gcc13 { name: 'gcc', version: '13', exe: 'g++' } + +# LLVM Compiler configurations +llvm9: &llvm9 { name: 'llvm', version: '9', exe: 'clang++' } +llvm10: &llvm10 { name: 'llvm', version: '10', exe: 'clang++' } +llvm11: &llvm11 { name: 'llvm', version: '11', exe: 'clang++' } +llvm12: &llvm12 { name: 'llvm', version: '12', exe: 'clang++' } +llvm13: &llvm13 { name: 'llvm', version: '13', exe: 'clang++' } +llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' } +llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' } +llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' } +llvm16: &llvm17 { name: 'llvm', version: '17', exe: 'clang++' } +llvm16: &llvm18 { name: 'llvm', version: '18', exe: 'clang++' } + +# MSVC configs +msvc2019: &msvc2019 { name: 'cl', version: '14.29', exe: 'cl++' } +msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } + +# Each environment below will generate a unique build/test job +# See the "compute-matrix" job in the workflow for how this is parsed and used +# cuda: The CUDA Toolkit version +# os: The operating system used +# cpu: The CPU architecture +# compiler: The compiler to use +# name: The compiler name +# version: The compiler version +# exe: The unverionsed compiler binary name +# std: The C++ standards to build for +# This field is unique as it will generate an independent build/test job for each value + +# Configurations that will run for every PR +pull_request: + nvcc: + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 } + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 } + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 } + # Fails to compile simple input on CTK12.4. Try to add later. + # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13 } + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17 } + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18, extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"} + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019 } + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022 } diff --git a/ci/ninja_summary.py b/ci/ninja_summary.py new file mode 100755 index 00000000..f496db53 --- /dev/null +++ b/ci/ninja_summary.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +r"""Summarize the last ninja build, invoked with ninja's -C syntax. + +This script is designed to be automatically run after each ninja build in +order to summarize the build's performance. Making build performance information +more visible should make it easier to notice anomalies and opportunities. To use +this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat. + +On Linux you can get autoninja to invoke this script using this syntax: + +$ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome + +You can also call this script directly using ninja's syntax to specify the +output directory of interest: + +> python3 post_build_ninja_summary.py -C out/Default + +Typical output looks like this: + +>ninja -C out\debug_component base +ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp +ninja: Entering directory `out\debug_component' +[1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files +Longest build steps: + 0.1 weighted s to build obj/base/base/trace_log.obj (6.7 s elapsed time) + 0.2 weighted s to build nasm.exe, nasm.exe.pdb (0.2 s elapsed time) + 0.3 weighted s to build obj/base/base/win_util.obj (12.4 s elapsed time) + 1.2 weighted s to build base.dll, base.dll.lib (1.2 s elapsed time) +Time by build-step type: + 0.0 s weighted time to generate 6 .lib files (0.3 s elapsed time sum) + 0.1 s weighted time to generate 25 .stamp files (1.2 s elapsed time sum) + 0.2 s weighted time to generate 20 .o files (2.8 s elapsed time sum) + 1.7 s weighted time to generate 4 PEFile (linking) files (2.0 s elapsed +time sum) + 23.9 s weighted time to generate 770 .obj files (974.8 s elapsed time sum) +26.1 s weighted time (982.9 s elapsed time sum, 37.7x parallelism) +839 build steps completed, average of 32.17/s + +If no gn clean has been done then results will be for the last non-NULL +invocation of ninja. Ideas for future statistics, and implementations are +appreciated. + +The "weighted" time is the elapsed time of each build step divided by the number +of tasks that were running in parallel. This makes it an excellent approximation +of how "important" a slow step was. A link that is entirely or mostly serialized +will have a weighted time that is the same or similar to its elapsed time. A +compile that runs in parallel with 999 other compiles will have a weighted time +that is tiny.""" + +import argparse +import errno +import fnmatch +import os +import subprocess +import sys + +# The number of long build times to report: +long_count = 10 +# The number of long times by extension to report +long_ext_count = 10 + + +class Target: + """Represents a single line read for a .ninja_log file.""" + def __init__(self, start, end): + """Creates a target object by passing in the start/end times in seconds + as a float.""" + self.start = start + self.end = end + # A list of targets, appended to by the owner of this object. + self.targets = [] + self.weighted_duration = 0.0 + + def Duration(self): + """Returns the task duration in seconds as a float.""" + return self.end - self.start + + def SetWeightedDuration(self, weighted_duration): + """Sets the duration, in seconds, passed in as a float.""" + self.weighted_duration = weighted_duration + + def WeightedDuration(self): + """Returns the task's weighted duration in seconds as a float. + + Weighted_duration takes the elapsed time of the task and divides it + by how many other tasks were running at the same time. Thus, it + represents the approximate impact of this task on the total build time, + with serialized or serializing steps typically ending up with much + longer weighted durations. + weighted_duration should always be the same or shorter than duration. + """ + # Allow for modest floating-point errors + epsilon = 0.000002 + if (self.weighted_duration > self.Duration() + epsilon): + print('%s > %s?' % (self.weighted_duration, self.Duration())) + assert (self.weighted_duration <= self.Duration() + epsilon) + return self.weighted_duration + + def DescribeTargets(self): + """Returns a printable string that summarizes the targets.""" + # Some build steps generate dozens of outputs - handle them sanely. + # The max_length was chosen so that it can fit most of the long + # single-target names, while minimizing word wrapping. + result = ', '.join(self.targets) + max_length = 65 + if len(result) > max_length: + result = result[:max_length] + '...' + return result + + +# Copied with some modifications from ninjatracing +def ReadTargets(log, show_all): + """Reads all targets from .ninja_log file |log_file|, sorted by duration. + + The result is a list of Target objects.""" + header = log.readline() + # Handle empty ninja_log gracefully by silently returning an empty list of + # targets. + if not header: + return [] + assert header == '# ninja log v5\n', \ + 'unrecognized ninja log version %r' % header + targets_dict = {} + last_end_seen = 0.0 + for line in log: + parts = line.strip().split('\t') + if len(parts) != 5: + # If ninja.exe is rudely halted then the .ninja_log file may be + # corrupt. Silently continue. + continue + start, end, _, name, cmdhash = parts # Ignore restat. + # Convert from integral milliseconds to float seconds. + start = int(start) / 1000.0 + end = int(end) / 1000.0 + if not show_all and end < last_end_seen: + # An earlier time stamp means that this step is the first in a new + # build, possibly an incremental build. Throw away the previous + # data so that this new build will be displayed independently. + # This has to be done by comparing end times because records are + # written to the .ninja_log file when commands complete, so end + # times are guaranteed to be in order, but start times are not. + targets_dict = {} + target = None + if cmdhash in targets_dict: + target = targets_dict[cmdhash] + if not show_all and (target.start != start or target.end != end): + # If several builds in a row just run one or two build steps + # then the end times may not go backwards so the last build may + # not be detected as such. However in many cases there will be a + # build step repeated in the two builds and the changed + # start/stop points for that command, identified by the hash, + # can be used to detect and reset the target dictionary. + targets_dict = {} + target = None + if not target: + targets_dict[cmdhash] = target = Target(start, end) + last_end_seen = end + target.targets.append(name) + return list(targets_dict.values()) + + +def GetExtension(target, extra_patterns): + """Return the file extension that best represents a target. + + For targets that generate multiple outputs it is important to return a + consistent 'canonical' extension. Ultimately the goal is to group build steps + by type.""" + for output in target.targets: + if extra_patterns: + for fn_pattern in extra_patterns.split(';'): + if fnmatch.fnmatch(output, '*' + fn_pattern + '*'): + return fn_pattern + # Not a true extension, but a good grouping. + if output.endswith('type_mappings'): + extension = 'type_mappings' + break + + # Capture two extensions if present. For example: file.javac.jar should + # be distinguished from file.interface.jar. + root, ext1 = os.path.splitext(output) + _, ext2 = os.path.splitext(root) + extension = ext2 + ext1 # Preserve the order in the file name. + + if len(extension) == 0: + extension = '(no extension found)' + + if ext1 in ['.pdb', '.dll', '.exe']: + extension = 'PEFile (linking)' + # Make sure that .dll and .exe are grouped together and that the + # .dll.lib files don't cause these to be listed as libraries + break + if ext1 in ['.so', '.TOC']: + extension = '.so (linking)' + # Attempt to identify linking, avoid identifying as '.TOC' + break + # Make sure .obj files don't get categorized as mojo files + if ext1 in ['.obj', '.o']: + break + # Jars are the canonical output of java targets. + if ext1 == '.jar': + break + # Normalize all mojo related outputs to 'mojo'. + if output.count('.mojom') > 0: + extension = 'mojo' + break + return extension + + +def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting): + """Print a summary of the passed in list of Target objects.""" + + # Create a list that is in order by time stamp and has entries for the + # beginning and ending of each build step (one time stamp may have multiple + # entries due to multiple steps starting/stopping at exactly the same time). + # Iterate through this list, keeping track of which tasks are running at all + # times. At each time step calculate a running total for weighted time so + # that when each task ends its own weighted time can easily be calculated. + task_start_stop_times = [] + + earliest = -1 + latest = 0 + total_cpu_time = 0 + for target in entries: + if earliest < 0 or target.start < earliest: + earliest = target.start + if target.end > latest: + latest = target.end + total_cpu_time += target.Duration() + task_start_stop_times.append((target.start, 'start', target)) + task_start_stop_times.append((target.end, 'stop', target)) + length = latest - earliest + weighted_total = 0.0 + + # Sort by the time/type records and ignore |target| + task_start_stop_times.sort(key=lambda times: times[:2]) + # Now we have all task start/stop times sorted by when they happen. If a + # task starts and stops on the same time stamp then the start will come + # first because of the alphabet, which is important for making this work + # correctly. + # Track the tasks which are currently running. + running_tasks = {} + # Record the time we have processed up to so we know how to calculate time + # deltas. + last_time = task_start_stop_times[0][0] + # Track the accumulated weighted time so that it can efficiently be added + # to individual tasks. + last_weighted_time = 0.0 + # Scan all start/stop events. + for event in task_start_stop_times: + time, action_name, target = event + # Accumulate weighted time up to now. + num_running = len(running_tasks) + if num_running > 0: + # Update the total weighted time up to this moment. + last_weighted_time += (time - last_time) / float(num_running) + if action_name == 'start': + # Record the total weighted task time when this task starts. + running_tasks[target] = last_weighted_time + if action_name == 'stop': + # Record the change in the total weighted task time while this task + # ran. + weighted_duration = last_weighted_time - running_tasks[target] + target.SetWeightedDuration(weighted_duration) + weighted_total += weighted_duration + del running_tasks[target] + last_time = time + assert (len(running_tasks) == 0) + + # Warn if the sum of weighted times is off by more than half a second. + if abs(length - weighted_total) > 500: + print('Warning: Possible corrupt ninja log, results may be ' + 'untrustworthy. Length = %.3f, weighted total = %.3f' % + (length, weighted_total)) + + # Print the slowest build steps: + print(' Longest build steps:') + if elapsed_time_sorting: + entries.sort(key=lambda x: x.Duration()) + else: + entries.sort(key=lambda x: x.WeightedDuration()) + for target in entries[-long_count:]: + print(' %8.1f weighted s to build %s (%.1f s elapsed time)' % + (target.WeightedDuration(), target.DescribeTargets(), + target.Duration())) + + # Sum up the time by file extension/type of the output file + count_by_ext = {} + time_by_ext = {} + weighted_time_by_ext = {} + # Scan through all of the targets to build up per-extension statistics. + for target in entries: + extension = GetExtension(target, extra_step_types) + time_by_ext[extension] = time_by_ext.get(extension, + 0) + target.Duration() + weighted_time_by_ext[extension] = weighted_time_by_ext.get( + extension, 0) + target.WeightedDuration() + count_by_ext[extension] = count_by_ext.get(extension, 0) + 1 + + print(' Time by build-step type:') + # Copy to a list with extension name and total time swapped, to (time, ext) + if elapsed_time_sorting: + weighted_time_by_ext_sorted = sorted( + (y, x) for (x, y) in time_by_ext.items()) + else: + weighted_time_by_ext_sorted = sorted( + (y, x) for (x, y) in weighted_time_by_ext.items()) + # Print the slowest build target types: + for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]: + print( + ' %8.1f s weighted time to generate %d %s files ' + '(%1.1f s elapsed time sum)' % + (time, count_by_ext[extension], extension, time_by_ext[extension])) + + print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx ' + 'parallelism)' % + (length, total_cpu_time, total_cpu_time * 1.0 / length)) + print(' %d build steps completed, average of %1.2f/s' % + (len(entries), len(entries) / (length))) + + +def main(): + log_file = '.ninja_log' + metrics_file = 'siso_metrics.json' + parser = argparse.ArgumentParser() + parser.add_argument('-C', dest='build_directory', help='Build directory.') + parser.add_argument( + '-s', + '--step-types', + help='semicolon separated fnmatch patterns for build-step grouping') + parser.add_argument( + '-e', + '--elapsed_time_sorting', + default=False, + action='store_true', + help='Sort output by elapsed time instead of weighted time') + parser.add_argument('--log-file', + help="specific ninja log file to analyze.") + args, _extra_args = parser.parse_known_args() + if args.build_directory: + log_file = os.path.join(args.build_directory, log_file) + metrics_file = os.path.join(args.build_directory, metrics_file) + if args.log_file: + log_file = args.log_file + if not args.step_types: + # Offer a convenient way to add extra step types automatically, + # including when this script is run by autoninja. get() returns None if + # the variable isn't set. + args.step_types = os.environ.get('chromium_step_types') + if args.step_types: + # Make room for the extra build types. + global long_ext_count + long_ext_count += len(args.step_types.split(';')) + + if os.path.exists(metrics_file): + # Automatically handle summarizing siso builds. + cmd = ['siso.bat' if 'win32' in sys.platform else 'siso'] + cmd.extend(['metrics', 'summary']) + if args.build_directory: + cmd.extend(['-C', args.build_directory]) + if args.step_types: + cmd.extend(['--step_types', args.step_types]) + if args.elapsed_time_sorting: + cmd.append('--elapsed_time_sorting') + subprocess.run(cmd) + else: + try: + with open(log_file, 'r') as log: + entries = ReadTargets(log, False) + if entries: + SummarizeEntries(entries, args.step_types, + args.elapsed_time_sorting) + except IOError: + print('Log file %r not found, no build summary created.' % log_file) + return errno.ENOENT + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/ci/pretty_printing.sh b/ci/pretty_printing.sh new file mode 100644 index 00000000..5bea1af9 --- /dev/null +++ b/ci/pretty_printing.sh @@ -0,0 +1,105 @@ +# Print "ARG=${ARG}" for all args. +function print_var_values() { + # Iterate through the arguments + for var_name in "$@"; do + if [ -z "$var_name" ]; then + echo "Usage: print_var_values ..." + return 1 + fi + + # Dereference the variable and print the result + echo "$var_name=${!var_name:-(undefined)}" + done +} + +# begin_group: Start a named section of log output, possibly with color. +# Usage: begin_group "Group Name" [Color] +# Group Name: A string specifying the name of the group. +# Color (optional): ANSI color code to set text color. Default is blue (1;34). +function begin_group() { + # See options for colors here: https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124 + local blue="34" + local name="${1:-}" + local color="${2:-$blue}" + + if [ -n "${GITHUB_ACTIONS:-}" ]; then + echo -e "::group::\e[${color}m${name}\e[0m" + else + echo -e "\e[${color}m================== ${name} ======================\e[0m" + fi +} + +# end_group: End a named section of log output and print status based on exit status. +# Usage: end_group "Group Name" [Exit Status] +# Group Name: A string specifying the name of the group. +# Exit Status (optional): The exit status of the command run within the group. Default is 0. +function end_group() { + local name="${1:-}" + local build_status="${2:-0}" + local duration="${3:-}" + local red="31" + local blue="34" + + if [ -n "${GITHUB_ACTIONS:-}" ]; then + echo "::endgroup::" + + if [ "$build_status" -ne 0 ]; then + echo -e "::error::\e[${red}m ${name} - Failed (⬆️ click above for full log ⬆️)\e[0m" + fi + else + if [ "$build_status" -ne 0 ]; then + echo -e "\e[${red}m================== End ${name} - Failed${duration:+ - Duration: ${duration}s} ==================\e[0m" + else + echo -e "\e[${blue}m================== End ${name} - Success${duration:+ - Duration: ${duration}s} ==================\n\e[0m" + fi + fi +} + +declare -A command_durations + +# Runs a command within a named group, handles the exit status, and prints appropriate messages based on the result. +# Usage: run_command "Group Name" command [arguments...] +function run_command() { + local group_name="${1:-}" + shift + local command=("$@") + local status + + begin_group "$group_name" + set +e + local start_time=$(date +%s) + "${command[@]}" + status=$? + local end_time=$(date +%s) + set -e + local duration=$((end_time - start_time)) + end_group "$group_name" $status $duration + command_durations["$group_name"]=$duration + return $status +} + +function string_width() { + local str="$1" + echo "$str" | awk '{print length}' +} + +function print_time_summary() { + local max_length=0 + local group + + # Find the longest group name for formatting + for group in "${!command_durations[@]}"; do + local group_length=$(echo "$group" | awk '{print length}') + if [ "$group_length" -gt "$max_length" ]; then + max_length=$group_length + fi + done + + echo "Time Summary:" + for group in "${!command_durations[@]}"; do + printf "%-${max_length}s : %s seconds\n" "$group" "${command_durations[$group]}" + done + + # Clear the array of timing info + declare -gA command_durations=() +} diff --git a/ci/sccache_hit_rate.sh b/ci/sccache_hit_rate.sh new file mode 100755 index 00000000..de8ae465 --- /dev/null +++ b/ci/sccache_hit_rate.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -euo pipefail + +# Ensure two arguments are provided +if [ $# -ne 2 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +# Print the contents of the before file +echo "=== Contents of $1 ===" >&2 +cat $1 >&2 +echo "=== End of $1 ===" >&2 + +# Print the contents of the after file +echo "=== Contents of $2 ===" >&2 +cat $2 >&2 +echo "=== End of $2 ===" >&2 + +# Extract compile requests and cache hits from the before and after files +requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1") +hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1") +requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2") +hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2") + +# Calculate the differences to find out how many new requests and hits +requests_diff=$((requests_after - requests_before)) +hits_diff=$((hits_after - hits_before)) + +echo "New Compile Requests: $requests_diff" >&2 +echo "New Hits: $hits_diff" >&2 + +# Calculate and print the hit rate +if [ $requests_diff -eq 0 ]; then + echo "No new compile requests, hit rate is not applicable" +else + hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}') + echo "sccache hit rate: $hit_rate%" >&2 + echo "$hit_rate" +fi diff --git a/ci/sccache_stats.sh b/ci/sccache_stats.sh new file mode 100755 index 00000000..3a3ebc42 --- /dev/null +++ b/ci/sccache_stats.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# This script prints the sccache hit rate between two calls to sccache --show-stats. +# It should be sourced in your script before and after the operations you want to profile, +# with the 'start' or 'end' argument respectively. + +mode=$1 + +if [[ "$mode" != "start" && "$mode" != "end" ]]; then + echo "Invalid mode: $mode" + echo "Usage: $0 {start|end}" + exit 1 +fi + +# Check if sccache is available +if ! command -v sccache &> /dev/null; then + echo "Notice: sccache is not available. Skipping..." + exit 0 +fi + +case $mode in + start) + export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}') + export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}') + ;; + end) + if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then + echo "Error: start stats not collected. Did you call this script with 'start' before your operations?" + exit 1 + fi + + final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}') + final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}') + hits=$((final_hits - SCCACHE_START_HITS)) + misses=$((final_misses - SCCACHE_START_MISSES)) + total=$((hits + misses)) + + prefix="" + if [ ${GITHUB_ACTIONS:-false} = "true" ]; then + prefix="::notice::" + fi + + if (( total > 0 )); then + hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }') + echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%" + else + echo ${prefix}"sccache stats: N/A No new compilation requests" + fi + unset SCCACHE_START_HITS + unset SCCACHE_START_MISSES + ;; +esac diff --git a/ci/test_nvbench.sh b/ci/test_nvbench.sh new file mode 100755 index 00000000..40559eda --- /dev/null +++ b/ci/test_nvbench.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +source "$(dirname "$0")/build_common.sh" + +# Run NVBench tests with high parallelism. If any need to be +# serialized, define the `RUN_SERIAL` CMake property on the +# test. +export CTEST_PARALLEL_LEVEL=${PARALLEL_LEVEL} + +print_environment_details + +./build_nvbench.sh "$@" + +PRESET="nvbench-ci" + +test_preset "NVBench" ${PRESET} + +print_time_summary diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1 new file mode 100644 index 00000000..1edea634 --- /dev/null +++ b/ci/windows/build_common.psm1 @@ -0,0 +1,207 @@ + +Param( + [Parameter(Mandatory = $true)] + [Alias("std")] + [ValidateNotNullOrEmpty()] + [ValidateSet(17)] + [int]$CXX_STANDARD = 17 +) + +# We need the full path to cl because otherwise cmake will replace CMAKE_CXX_COMPILER with the full path +# and keep CMAKE_CUDA_HOST_COMPILER at "cl" which breaks our cmake script +$script:HOST_COMPILER = (Get-Command "cl").source -replace '\\','/' +$script:PARALLEL_LEVEL = (Get-WmiObject -class Win32_processor).NumberOfLogicalProcessors + +# Extract the CL version for export to build scripts: +$script:CL_VERSION_STRING = & cl.exe /? +if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") { + $CL_VERSION = [version]$matches[1] + Write-Host "Detected cl.exe version: $CL_VERSION" +} + +if (-not $env:CCCL_BUILD_INFIX) { + $env:CCCL_BUILD_INFIX = "" +} + +# Presets will be configured in this directory: +$BUILD_DIR = "../build/$env:CCCL_BUILD_INFIX" + +If(!(test-path -PathType container "../build")) { + New-Item -ItemType Directory -Path "../build" +} + +# The most recent build will always be symlinked to cccl/build/latest +New-Item -ItemType Directory -Path "$BUILD_DIR" -Force + +# Prepare environment for CMake: +$env:CMAKE_BUILD_PARALLEL_LEVEL = $PARALLEL_LEVEL +$env:CTEST_PARALLEL_LEVEL = 1 +$env:CUDAHOSTCXX = $HOST_COMPILER.FullName +$env:CXX = $HOST_COMPILER.FullName + +Write-Host "========================================" +Write-Host "Begin build" +Write-Host "pwd=$pwd" +Write-Host "BUILD_DIR=$BUILD_DIR" +Write-Host "CXX_STANDARD=$CXX_STANDARD" +Write-Host "CXX=$env:CXX" +Write-Host "CUDACXX=$env:CUDACXX" +Write-Host "CUDAHOSTCXX=$env:CUDAHOSTCXX" +Write-Host "NVCC_VERSION=$NVCC_VERSION" +Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL" +Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL" +Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX" +Write-Host "Current commit is:" +Write-Host "$(git log -1)" +Write-Host "========================================" + +function configure_preset { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$BUILD_NAME, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$PRESET, + [Parameter(Mandatory = $true)] + [AllowEmptyString()] + [string]$CMAKE_OPTIONS + ) + + $step = "$BUILD_NAME (configure)" + + # CMake must be invoked in the same directory as the presets file: + pushd ".." + + $cmake_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE" + echo "$cmake_command" + Invoke-Expression $cmake_command + $test_result = $LastExitCode + + If ($test_result -ne 0) { + throw "$step Failed" + } + + popd + Write-Host "$step complete." +} + +function build_preset { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$BUILD_NAME, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$PRESET + ) + + $step = "$BUILD_NAME (build)" + + # CMake must be invoked in the same directory as the presets file: + pushd ".." + + sccache_stats('Start') + + cmake --build --preset $PRESET -v + $test_result = $LastExitCode + + sccache_stats('Stop') + + echo "$step complete" + + If ($test_result -ne 0) { + throw "$step Failed" + } + + popd +} + +function test_preset { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$BUILD_NAME, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$PRESET + ) + + $step = "$BUILD_NAME (test)" + + # CTest must be invoked in the same directory as the presets file: + pushd ".." + + sccache_stats('Start') + + ctest --preset $PRESET + $test_result = $LastExitCode + + sccache_stats('Stop') + + echo "$step complete" + + If ($test_result -ne 0) { + throw "$step Failed" + } + + popd +} + +function configure_and_build_preset { + Param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$BUILD_NAME, + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$PRESET, + [Parameter(Mandatory = $true)] + [AllowEmptyString()] + [string]$CMAKE_OPTIONS + ) + + configure_preset "$BUILD_NAME" "$PRESET" "$CMAKE_OPTIONS" + build_preset "$BUILD_NAME" "$PRESET" +} + +function sccache_stats { + Param ( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [ValidateSet('Start','Stop')] + [string]$MODE + ) + + $sccache_stats = sccache -s + If($MODE -eq 'Start') { + [int]$script:sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+') + [int]$script:sccache_cache_hits_cpp = ($sccache_stats[2] -replace '[^\d]+') + [int]$script:sccache_cache_hits_cuda = ($sccache_stats[3] -replace '[^\d]+') + [int]$script:sccache_cache_miss_cpp = ($sccache_stats[5] -replace '[^\d]+') + [int]$script:sccache_cache_miss_cuda = ($sccache_stats[6] -replace '[^\d]+') + } else { + [int]$final_sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+') + [int]$final_sccache_cache_hits_cpp = ($sccache_stats[2] -replace '[^\d]+') + [int]$final_sccache_cache_hits_cuda = ($sccache_stats[3] -replace '[^\d]+') + [int]$final_sccache_cache_miss_cpp = ($sccache_stats[5] -replace '[^\d]+') + [int]$final_sccache_cache_miss_cuda = ($sccache_stats[6] -replace '[^\d]+') + + [int]$total_requests = $final_sccache_compile_requests - $script:sccache_compile_requests + [int]$total_hits_cpp = $final_sccache_cache_hits_cpp - $script:sccache_cache_hits_cpp + [int]$total_hits_cuda = $final_sccache_cache_hits_cuda - $script:sccache_cache_hits_cuda + [int]$total_miss_cpp = $final_sccache_cache_miss_cpp - $script:sccache_cache_miss_cpp + [int]$total_miss_cuda = $final_sccache_cache_miss_cuda - $script:sccache_cache_miss_cuda + If ( $total_requests -gt 0 ) { + [int]$hit_rate_cpp = $total_hits_cpp / $total_requests * 100; + [int]$hit_rate_cuda = $total_hits_cuda / $total_requests * 100; + echo "sccache hits cpp: $total_hits_cpp `t| misses: $total_miss_cpp `t| hit rate: $hit_rate_cpp%" + echo "sccache hits cuda: $total_hits_cuda `t| misses: $total_miss_cuda `t| hit rate: $hit_rate_cuda%" + } else { + echo "sccache stats: N/A No new compilation requests" + } + } +} + +Export-ModuleMember -Function configure_preset, build_preset, test_preset, configure_and_build_preset, sccache_stats +Export-ModuleMember -Variable BUILD_DIR, CL_VERSION diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 new file mode 100644 index 00000000..7240698c --- /dev/null +++ b/ci/windows/build_nvbench.ps1 @@ -0,0 +1,30 @@ + +Param( + [Parameter(Mandatory = $false)] + [Alias("cmake-options")] + [ValidateNotNullOrEmpty()] + [string]$ARG_CMAKE_OPTIONS = "" +) + +$CURRENT_PATH = Split-Path $pwd -leaf +If($CURRENT_PATH -ne "ci") { + Write-Host "Moving to ci folder" + pushd "$PSScriptRoot/.." +} + +Remove-Module -Name build_common +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList 17 + +$PRESET = "nvbench-ci" +$CMAKE_OPTIONS = "" + +# Append any arguments pass in on the command line +If($ARG_CMAKE_OPTIONS -ne "") { + $CMAKE_OPTIONS += " $ARG_CMAKE_OPTIONS" +} + +configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" + +If($CURRENT_PATH -ne "ci") { + popd +} diff --git a/ci/windows/test_nvbench.ps1 b/ci/windows/test_nvbench.ps1 new file mode 100644 index 00000000..4ee5106b --- /dev/null +++ b/ci/windows/test_nvbench.ps1 @@ -0,0 +1,31 @@ + +Param( + [Parameter(Mandatory = $false)] + [Alias("cmake-options")] + [ValidateNotNullOrEmpty()] + [string]$ARG_CMAKE_OPTIONS = "" +) + +$CURRENT_PATH = Split-Path $pwd -leaf +If($CURRENT_PATH -ne "ci") { + Write-Host "Moving to ci folder" + pushd "$PSScriptRoot/.." +} + +Remove-Module -Name build_common +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList 17 + +$PRESET = "nvbench-ci" +$CMAKE_OPTIONS = "" + +# Append any arguments pass in on the command line +If($ARG_CMAKE_OPTIONS -ne "") { + $CMAKE_OPTIONS += " $ARG_CMAKE_OPTIONS" +} + +configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS" +test_preset "NVBench" "$PRESET" + +If($CURRENT_PATH -ne "ci") { + popd +} diff --git a/cmake/DetectSupportedStandards.cmake b/cmake/DetectSupportedStandards.cmake new file mode 100644 index 00000000..6a86d6ac --- /dev/null +++ b/cmake/DetectSupportedStandards.cmake @@ -0,0 +1,65 @@ +# Detect the langauge standards supported by the current compilers. +# +# Usage: detect_supported_cxx_standards( ) +# +# - var_prefix: Used to name result variables, +# e.g. ${var_prefix}_${lang}_XX_SUPPORTED will be TRUE or FALSE. Defined for +# each XX in ${standards}. +# - lang: The language to test: C, CXX, or CUDA. +# - standards: List of any standard versions. +# +# Example: detect_supported_standards(PROJ CXX 11 14 17) +# - Sets the following variables in the parent scope to TRUE or FALSE: +# - PROJ_CXX_11_SUPPORTED +# - PROJ_CXX_14_SUPPORTED +# - PROJ_CXX_17_SUPPORTED +# - Sets `PROJ_DETECTED_CXX_STANDARDS` to a list of supported standards (e.g. "11;14;17"). +function(detect_supported_standards prefix lang) + string(TOLOWER "${lang}_std" feature_prefix) + set(all_stds) + foreach(standard IN LISTS ARGN) + set(var_name "${prefix}_${lang}_${standard}_SUPPORTED") + if ("${feature_prefix}_${standard}" IN_LIST CMAKE_${lang}_COMPILE_FEATURES) + set(${var_name} TRUE) + else() + set(${var_name} FALSE) + endif() + + # Special cases: + if (standard EQUAL 17 AND + (lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND + ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))) + # gcc < 7 and clang < 8 don't fully support C++17. + # They accept the flag and have partial support, but nvcc will refuse + # to enable it and falls back to the default dialect for the current + # CXX compiler version. This breaks our CI. + # CMake's COMPILE_FEATURES var reports that these compilers support C++17, + # but we can't rely on it, so manually disable the dialect in these cases. + set(${var_name} FALSE) + endif() + + if (standard EQUAL 20 AND + (lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND + ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1930))) + # Similar to the above, but for C++20. + set(${var_name} FALSE) + endif() + + if (${var_name}) + list(APPEND all_stds ${standard}) + endif() + + message(STATUS "Testing ${lang}${standard} Support: ${${var_name}}") + set(${var_name} ${${var_name}} PARENT_SCOPE) + endforeach() + + set(${prefix}_DETECTED_${lang}_STANDARDS "${all_stds}" PARENT_SCOPE) +endfunction() diff --git a/cmake/NVBenchClangdCompileInfo.cmake b/cmake/NVBenchClangdCompileInfo.cmake new file mode 100644 index 00000000..a4b9c5e7 --- /dev/null +++ b/cmake/NVBenchClangdCompileInfo.cmake @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tell cmake to generate a json file of compile commands for clangd: +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +# Symlink the compile command output to the source dir, where clangd will find it. +set(compile_commands_file "${CMAKE_BINARY_DIR}/compile_commands.json") +set(compile_commands_link "${CMAKE_SOURCE_DIR}/compile_commands.json") +message(STATUS "Creating symlink from ${compile_commands_link} to ${compile_commands_file}...") +nvbench_execute_non_fatal_process(COMMAND + "${CMAKE_COMMAND}" -E rm -f "${compile_commands_link}") +nvbench_execute_non_fatal_process(COMMAND + "${CMAKE_COMMAND}" -E touch "${compile_commands_file}") +nvbench_execute_non_fatal_process(COMMAND + "${CMAKE_COMMAND}" -E create_symlink "${compile_commands_file}" "${compile_commands_link}") diff --git a/cmake/NVBenchConfigTarget.cmake b/cmake/NVBenchConfigTarget.cmake index ebb6e4d4..bef95fcf 100644 --- a/cmake/NVBenchConfigTarget.cmake +++ b/cmake/NVBenchConfigTarget.cmake @@ -29,7 +29,6 @@ function(nvbench_add_cxx_flag target_name type flag) target_compile_options(${target_name} ${type} $<$:${flag}> $<$:-Xcompiler=${flag}> - # FIXME nvc++ case ) endif() endfunction() @@ -57,14 +56,15 @@ else() nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-parameter") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wvla") nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wgnu") + nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wno-gnu-line-marker") # WAR 3916341 if (NVBench_ENABLE_WERROR) nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Werror") endif() endif() -# GCC-specific flags -if (CMAKE_CXX_COMPILER_ID STREQUAL GNU) +# Experimental filesystem library +if (CMAKE_CXX_COMPILER_ID STREQUAL GNU OR CMAKE_CXX_COMPILER_ID STREQUAL Clang) target_link_libraries(nvbench.build_interface INTERFACE stdc++fs) endif() diff --git a/cmake/NVBenchDependencies.cmake b/cmake/NVBenchDependencies.cmake index 5496b8fc..8ba07fe6 100644 --- a/cmake/NVBenchDependencies.cmake +++ b/cmake/NVBenchDependencies.cmake @@ -1,52 +1,52 @@ ################################################################################ # fmtlib/fmt -rapids_cpm_find(fmt 7.1.3 +include("${rapids-cmake-dir}/cpm/fmt.cmake") + +if(NOT BUILD_SHARED_LIBS AND NVBench_ENABLE_INSTALL_RULES) +set(export_set_details BUILD_EXPORT_SET nvbench-targets + INSTALL_EXPORT_SET nvbench-targets) +endif() + +rapids_cpm_fmt(${export_set_details} CPM_ARGS - GITHUB_REPOSITORY fmtlib/fmt - GIT_TAG 7.1.3 - GIT_SHALLOW TRUE OPTIONS # Force static to keep fmt internal. "BUILD_SHARED_LIBS OFF" - "CMAKE_POSITION_INDEPENDENT_CODE ON" ) +if(NOT fmt_ADDED) + set(fmt_is_external TRUE) +endif() + ################################################################################ # nlohmann/json # # Following recipe from # http://github.com/cpm-cmake/CPM.cmake/blob/master/examples/json/CMakeLists.txt # Download the zips because the repo takes an excessively long time to clone. -rapids_cpm_find(nlohmann_json 3.9.1 - # Release: +rapids_cpm_find(nlohmann_json 3.11.3 CPM_ARGS - URL https://github.com/nlohmann/json/releases/download/v3.9.1/include.zip - URL_HASH SHA256=6bea5877b1541d353bd77bdfbdb2696333ae5ed8f9e8cc22df657192218cad91 - PATCH_COMMAND - # Work around compiler bug in nvcc 11.0, see NVIDIA/NVBench#18 - ${CMAKE_COMMAND} -E copy - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/nlohmann_json.hpp" - "./include/nlohmann/json.hpp" - - # Development version: - # I'm waiting for https://github.com/nlohmann/json/issues/2676 to be fixed, - # leave this in to simplify testing patches as they come out. Update the - # `nvbench_json` target too when switching branches. - # CPM_ARGS - # VERSION develop - # URL https://github.com/nlohmann/json/archive/refs/heads/develop.zip - # OPTIONS JSON_MultipleHeaders ON + URL https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip + URL_HASH SHA256=a22461d13119ac5c78f205d3df1db13403e58ce1bb1794edc9313677313f4a9d + PATCH_COMMAND + ${CMAKE_COMMAND} + -D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}" + -D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}" + -D "CXX_ID=${CMAKE_CXX_COMPILER_ID}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/json_unordered_map_ice.cmake" ) -# nlohmann_json release headers add_library(nvbench_json INTERFACE IMPORTED) -target_include_directories(nvbench_json SYSTEM INTERFACE - "${nlohmann_json_SOURCE_DIR}/include" -) - -# nlohmann_json development branch: -#add_library(nvbench_json INTERFACE) -#target_link_libraries(nvbench_json INTERFACE nlohmann_json) +if (TARGET nlohmann_json::nlohmann_json) + # If we have a target, just use it. Cannot be an ALIAS library because + # nlohmann_json::nlohmann_json itself might be one. + target_link_libraries(nvbench_json INTERFACE nlohmann_json::nlohmann_json) +else() + # Otherwise we only downloaded the headers. + target_include_directories(nvbench_json SYSTEM INTERFACE + "${nlohmann_json_SOURCE_DIR}/include" + ) +endif() ################################################################################ # CUDAToolkit diff --git a/cmake/NVBenchDependentDlls.cmake b/cmake/NVBenchDependentDlls.cmake index bd9270d6..1a51c873 100644 --- a/cmake/NVBenchDependentDlls.cmake +++ b/cmake/NVBenchDependentDlls.cmake @@ -12,14 +12,6 @@ else() set(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD OFF) endif() -if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD) - message(STATUS - "CMake 3.21.0 is required when NVBench_ADD_DEPENDENT_DLLS_TO_BUILD " - "is enabled." - ) - cmake_minimum_required(VERSION 3.21.0) -endif() - function(nvbench_setup_dep_dlls target_name) # The custom command below fails when there aren't any runtime DLLs to copy, # so only enable it when a relevant dependency is enabled: diff --git a/cmake/NVBenchExports.cmake b/cmake/NVBenchExports.cmake index ef96acd9..cb32bf88 100644 --- a/cmake/NVBenchExports.cmake +++ b/cmake/NVBenchExports.cmake @@ -1,37 +1,51 @@ macro(nvbench_generate_exports) - set(nvbench_build_export_code_block "") - set(nvbench_install_export_code_block "") + if(NVBench_ENABLE_INSTALL_RULES) + set(nvbench_build_export_code_block "") + set(nvbench_install_export_code_block "") - if (NVBench_ENABLE_NVML) - string(APPEND nvbench_build_export_code_block - "include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n" - ) - string(APPEND nvbench_install_export_code_block - "include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n" - ) - endif() + if (NVBench_ENABLE_NVML) + string(APPEND nvbench_build_export_code_block + "include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n" + ) + string(APPEND nvbench_install_export_code_block + "include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n" + ) + endif() - if (NVBench_ENABLE_CUPTI) - string(APPEND nvbench_build_export_code_block - "include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake\")\n" + if (NVBench_ENABLE_CUPTI) + string(APPEND nvbench_build_export_code_block + "include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake\")\n" + ) + string(APPEND nvbench_install_export_code_block + "include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchCUPTI.cmake\")\n" + ) + endif() + + if (TARGET nvbench_json) + set(nvbench_json_code_block + [=[ + add_library(nvbench_json INTERFACE IMPORTED) + if (TARGET nlohmann_json::nlohmann_json) + target_link_libraries(nvbench_json INTERFACE nlohmann_json::nlohmann_json) + endif() + ]=]) + string(APPEND nvbench_build_export_code_block ${nvbench_json_code_block}) + string(APPEND nvbench_install_export_code_block ${nvbench_json_code_block}) + endif() + + rapids_export(BUILD NVBench + EXPORT_SET nvbench-targets + NAMESPACE "nvbench::" + GLOBAL_TARGETS nvbench main ctl internal_build_interface + LANGUAGES CUDA CXX + FINAL_CODE_BLOCK nvbench_build_export_code_block ) - string(APPEND nvbench_install_export_code_block - "include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchCUPTI.cmake\")\n" + rapids_export(INSTALL NVBench + EXPORT_SET nvbench-targets + NAMESPACE "nvbench::" + GLOBAL_TARGETS nvbench main ctl internal_build_interface + LANGUAGES CUDA CXX + FINAL_CODE_BLOCK nvbench_install_export_code_block ) endif() - - rapids_export(BUILD NVBench - EXPORT_SET nvbench-targets - NAMESPACE "nvbench::" - GLOBAL_TARGETS nvbench main ctl internal_build_interface - LANGUAGES CUDA CXX - FINAL_CODE_BLOCK nvbench_build_export_code_block - ) - rapids_export(INSTALL NVBench - EXPORT_SET nvbench-targets - NAMESPACE "nvbench::" - GLOBAL_TARGETS nvbench main ctl internal_build_interface - LANGUAGES CUDA CXX - FINAL_CODE_BLOCK nvbench_install_export_code_block - ) endmacro() diff --git a/cmake/NVBenchHeaderTesting.cmake b/cmake/NVBenchHeaderTesting.cmake new file mode 100644 index 00000000..354ec84d --- /dev/null +++ b/cmake/NVBenchHeaderTesting.cmake @@ -0,0 +1,40 @@ +# For every public header, build a translation unit containing `#include
` +# with some various checks. + +set(excluded_headers_regexes + # Should never be used externally. + "^detail" + "^internal" +) + +# Meta target for all configs' header builds: +add_custom_target(nvbench.headers.all) +add_dependencies(nvbench.all nvbench.headers.all) + +file(GLOB_RECURSE header_files + RELATIVE "${NVBench_SOURCE_DIR}/nvbench/" + CONFIGURE_DEPENDS + "${NVBench_SOURCE_DIR}/nvbench/*.cuh" +) + +foreach (exclusion IN LISTS excluded_headers_regexes) + list(FILTER header_files EXCLUDE REGEX "${exclusion}") +endforeach() + +function (nvbench_add_header_target target_name cuda_std) + foreach (header IN LISTS header_files) + set(headertest_src "headers/${target_name}/${header}.cu") + set(header_str "nvbench/${header}") # Substitution used by configure_file: + configure_file("${NVBench_SOURCE_DIR}/cmake/header_test.in.cxx" "${headertest_src}") + list(APPEND headertest_srcs "${headertest_src}") + endforeach() + + add_library(${target_name} OBJECT ${headertest_srcs}) + target_link_libraries(${target_name} PUBLIC nvbench::nvbench) + set_target_properties(${target_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std}) + add_dependencies(nvbench.headers.all ${target_name}) +endfunction() + +foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS) + nvbench_add_header_target(nvbench.headers.cpp${std} ${std}) +endforeach() diff --git a/cmake/NVBenchInstallRules.cmake b/cmake/NVBenchInstallRules.cmake index 77bc9ff4..16e9b7e6 100644 --- a/cmake/NVBenchInstallRules.cmake +++ b/cmake/NVBenchInstallRules.cmake @@ -1,61 +1,69 @@ -include(GNUInstallDirs) -rapids_cmake_install_lib_dir(NVBench_INSTALL_LIB_DIR) - -# in-source public headers: -install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench" - TYPE INCLUDE - FILES_MATCHING - PATTERN "*.cuh" - PATTERN "internal" EXCLUDE -) - -# generated headers from build dir: -install( - FILES - "${NVBench_BINARY_DIR}/nvbench/config.cuh" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench" -) -install( - FILES - "${NVBench_BINARY_DIR}/nvbench/detail/version.cuh" - "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail" -) - -# -# Install CMake files needed by consumers to locate dependencies: -# - -# Borrowing this logic from rapids_cmake's export logic to make sure these end -# up in the same location as nvbench-config.cmake: -rapids_cmake_install_lib_dir(config_install_location) -set(config_install_location "${config_install_location}/cmake/nvbench") - -if (NVBench_ENABLE_NVML) + +if(NVBench_ENABLE_INSTALL_RULES) + + include(GNUInstallDirs) + rapids_cmake_install_lib_dir(NVBench_INSTALL_LIB_DIR) + + # in-source public headers: + install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench" + TYPE INCLUDE + FILES_MATCHING + PATTERN "*.cuh" + PATTERN "internal" EXCLUDE + ) + + # generated headers from build dir: install( FILES - "${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake" - DESTINATION "${config_install_location}" + "${NVBench_BINARY_DIR}/nvbench/config.cuh" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench" ) -endif() - -if (NVBench_ENABLE_CUPTI) install( FILES - "${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake" - DESTINATION "${config_install_location}" + "${NVBench_BINARY_DIR}/nvbench/detail/version.cuh" + "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail" ) + + # + # Install CMake files needed by consumers to locate dependencies: + # + + # Borrowing this logic from rapids_cmake's export logic to make sure these end + # up in the same location as nvbench-config.cmake: + rapids_cmake_install_lib_dir(config_install_location) + set(config_install_location "${config_install_location}/cmake/nvbench") + + if (NVBench_ENABLE_NVML) + install( + FILES + "${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake" + DESTINATION "${config_install_location}" + ) + endif() + + if (NVBench_ENABLE_CUPTI) + install( + FILES + "${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake" + DESTINATION "${config_install_location}" + ) + endif() endif() # Call with a list of library targets to generate install rules: function(nvbench_install_libraries) - install(TARGETS ${ARGN} - DESTINATION "${NVBench_INSTALL_LIB_DIR}" - EXPORT nvbench-targets - ) + if(NVBench_ENABLE_INSTALL_RULES) + install(TARGETS ${ARGN} + DESTINATION "${NVBench_INSTALL_LIB_DIR}" + EXPORT nvbench-targets + ) + endif() endfunction() # Call with a list of executables to generate install rules: function(nvbench_install_executables) - install(TARGETS ${ARGN} EXPORT nvbench-targets) + if(NVBench_ENABLE_INSTALL_RULES) + install(TARGETS ${ARGN} EXPORT nvbench-targets) + endif() endfunction() diff --git a/cmake/NVBenchNVML.cmake b/cmake/NVBenchNVML.cmake index f2aadbbe..4b005f3c 100644 --- a/cmake/NVBenchNVML.cmake +++ b/cmake/NVBenchNVML.cmake @@ -1,37 +1,43 @@ -# Since this file is installed, we need to make sure that the CUDAToolkit has -# been found by consumers: -if (NOT TARGET CUDA::toolkit) - find_package(CUDAToolkit REQUIRED) -endif() - -if (WIN32) - # The CUDA:: targets currently don't provide dll locations through the - # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries - # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls` - # CMake function from copying the dlls to the build / install directories. - # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845 - # and the other CMake issues it links to. - # - # We create a nvbench-specific target that configures the nvml interface as - # described here: - # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538 - # - # Use find_file instead of find_library, which would search for a .lib file. - # This is also nice because find_file searches recursively (find_library - # does not) and some versions of CTK nest nvml.dll several directories deep - # under C:\Windows\System32. - find_file(NVBench_NVML_DLL nvml.dll REQUIRED - DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32." - PATHS "C:/Windows/System32" - ) - mark_as_advanced(NVBench_NVML_DLL) - add_library(nvbench::nvml SHARED IMPORTED) - target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit) - set_target_properties(nvbench::nvml PROPERTIES - IMPORTED_LOCATION "${NVBench_NVML_DLL}" - IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}" - ) -else() - # Linux is much easier... - add_library(nvbench::nvml ALIAS CUDA::nvml) -endif() +# Since this file is installed, we need to make sure that the CUDAToolkit has +# been found by consumers: +if (NOT TARGET CUDA::toolkit) + find_package(CUDAToolkit REQUIRED) +endif() + +if (WIN32) + # The CUDA:: targets currently don't provide dll locations through the + # `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries + # (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls` + # CMake function from copying the dlls to the build / install directories. + # This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845 + # and the other CMake issues it links to. + # + # We create a nvbench-specific target that configures the nvml interface as + # described here: + # https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538 + # + # Use find_file instead of find_library, which would search for a .lib file. + # This is also nice because find_file searches recursively (find_library + # does not) and some versions of CTK nest nvml.dll several directories deep + # under C:\Windows\System32. + find_file(NVBench_NVML_DLL nvml.dll + DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32." + PATHS "C:/Windows/System32" + ) + mark_as_advanced(NVBench_NVML_DLL) +endif() + +if (NVBench_NVML_DLL) + add_library(nvbench::nvml SHARED IMPORTED) + target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit) + set_target_properties(nvbench::nvml PROPERTIES + IMPORTED_LOCATION "${NVBench_NVML_DLL}" + IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}" + ) +elseif(TARGET CUDA::nvml) + add_library(nvbench::nvml ALIAS CUDA::nvml) +else() + message(FATAL_ERROR "Could not find nvml.dll or CUDA::nvml target. " + "Set -DNVBench_ENABLE_NVML=OFF to disable NVML support " + "or set -DNVBench_NVML_DLL to the full path to nvml.dll on Windows.") +endif() diff --git a/cmake/NVBenchRapidsCMake.cmake b/cmake/NVBenchRapidsCMake.cmake index 5c09d302..b110ccc5 100644 --- a/cmake/NVBenchRapidsCMake.cmake +++ b/cmake/NVBenchRapidsCMake.cmake @@ -1,10 +1,12 @@ # Called before project(...) macro(nvbench_load_rapids_cmake) - file(DOWNLOAD - https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake - "${CMAKE_BINARY_DIR}/RAPIDS.cmake" - ) - include("${CMAKE_BINARY_DIR}/RAPIDS.cmake") + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake") + file(DOWNLOAD + https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake + "${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake" + ) + endif() + include("${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake") include(rapids-cmake) include(rapids-cpm) @@ -19,9 +21,5 @@ endmacro() macro(nvbench_init_rapids_cmake) rapids_cmake_build_type(Release) rapids_cmake_write_version_file("${NVBench_BINARY_DIR}/nvbench/detail/version.cuh") - rapids_cmake_write_git_revision_file( - nvbench_git_revision - "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" - ) rapids_cpm_init() endmacro() diff --git a/cmake/NVBenchUtilities.cmake b/cmake/NVBenchUtilities.cmake index 36684203..caa79b8b 100644 --- a/cmake/NVBenchUtilities.cmake +++ b/cmake/NVBenchUtilities.cmake @@ -1,3 +1,48 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Passes all args directly to execute_process while setting up the following +# results variables and propogating them to the caller's scope: +# +# - nvbench_process_exit_code +# - nvbench_process_stdout +# - nvbench_process_stderr +# +# If the command is not successful (e.g. the last command does not return zero), +# a non-fatal warning is printed. +function(nvbench_execute_non_fatal_process) + execute_process(${ARGN} + RESULT_VARIABLE nvbench_process_exit_code + OUTPUT_VARIABLE nvbench_process_stdout + ERROR_VARIABLE nvbench_process_stderr + ) + + if (NOT nvbench_process_exit_code EQUAL 0) + message(WARNING + "execute_process failed with non-zero exit code: ${nvbench_process_exit_code}\n" + "${ARGN}\n" + "stdout:\n${nvbench_process_stdout}\n" + "stderr:\n${nvbench_process_stderr}\n" + ) + endif() + + set(nvbench_process_exit_code "${nvbench_process_exit_code}" PARENT_SCOPE) + set(nvbench_process_stdout "${nvbench_process_stdout}" PARENT_SCOPE) + set(nvbench_process_stderr "${nvbench_process_stderr}" PARENT_SCOPE) +endfunction() + # Writes CMAKE_CUDA_ARCHITECTURES to out_var, but using escaped semicolons # as delimiters function(nvbench_escaped_cuda_arches out_var) diff --git a/cmake/PrintCTestRunTimes.cmake b/cmake/PrintCTestRunTimes.cmake new file mode 100644 index 00000000..f4ac7d90 --- /dev/null +++ b/cmake/PrintCTestRunTimes.cmake @@ -0,0 +1,127 @@ +## This CMake script parses the output of ctest and prints a formatted list +## of individual test runtimes, sorted longest first. +## +## ctest > ctest_log +## cmake -DLOGFILE=ctest_log \ +## -DMINSEC=10 \ +## -P PrintCTestRunTimes.cmake +## +################################################################################ + +cmake_minimum_required(VERSION 3.15) + +# Prepend the string with "0" until the string length equals the specified width +function(pad_string_with_zeros string_var width) + set(local_string "${${string_var}}") + string(LENGTH "${local_string}" size) + while(size LESS width) + string(PREPEND local_string "0") + string(LENGTH "${local_string}" size) + endwhile() + set(${string_var} "${local_string}" PARENT_SCOPE) +endfunction() + +################################################################################ + +if (NOT LOGFILE) + message(FATAL_ERROR "Missing -DLOGFILE= argument.") +endif() + +if (NOT DEFINED MINSEC) + set(MINSEC 10) +endif() + +set(num_below_thresh 0) + +# Check if logfile exists +if (NOT EXISTS "${LOGFILE}") + message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').") +endif() + +string(JOIN "" regex + "[0-9]+/[0-9]+[ ]+Test[ ]+#" + "([0-9]+)" # Test ID + ":[ ]+" + "([^ ]+)" # Test Name + "[ ]*\\.+[ ]*\\**[ ]*" + "([^ ]+)" # Result + "[ ]+" + "([0-9]+)" # Seconds + "\\.[0-9]+[ ]+sec" +) + +message(DEBUG "LOGFILE: ${LOGFILE}") +message(DEBUG "MINSEC: ${MINSEC}") +message(DEBUG "regex: ${regex}") + +# Read the logfile and generate a map / keylist +set(keys) +file(STRINGS "${LOGFILE}" lines) +foreach(line ${lines}) + + # Parse each build time + string(REGEX MATCH "${regex}" _DUMMY "${line}") + + if (CMAKE_MATCH_COUNT EQUAL 4) + set(test_id "${CMAKE_MATCH_1}") + set(test_name "${CMAKE_MATCH_2}") + set(test_result "${CMAKE_MATCH_3}") + set(tmp "${CMAKE_MATCH_4}") # floor(runtime_seconds) + + if (tmp LESS MINSEC) + math(EXPR num_below_thresh "${num_below_thresh} + 1") + continue() + endif() + + # Compute human readable time + math(EXPR days "${tmp} / (60 * 60 * 24)") + math(EXPR tmp "${tmp} - (${days} * 60 * 60 * 24)") + math(EXPR hours "${tmp} / (60 * 60)") + math(EXPR tmp "${tmp} - (${hours} * 60 * 60)") + math(EXPR minutes "${tmp} / (60)") + math(EXPR tmp "${tmp} - (${minutes} * 60)") + math(EXPR seconds "${tmp}") + + # Format time components + pad_string_with_zeros(days 3) + pad_string_with_zeros(hours 2) + pad_string_with_zeros(minutes 2) + pad_string_with_zeros(seconds 2) + + # Construct table entry + # Later values in the file for the same command overwrite earlier entries + string(MAKE_C_IDENTIFIER "${test_id}" key) + string(JOIN " | " ENTRY_${key} + "${days}d ${hours}h ${minutes}m ${seconds}s" + "${test_result}" + "${test_id}: ${test_name}" + ) + + # Record the key: + list(APPEND keys "${key}") + endif() +endforeach() + +list(REMOVE_DUPLICATES keys) + +# Build the entry list: +set(entries) +foreach(key ${keys}) + list(APPEND entries "${ENTRY_${key}}") +endforeach() + +if (NOT entries) + message(STATUS "LOGFILE contained no test times ('${LOGFILE}').") +endif() + +# Sort in descending order: +list(SORT entries ORDER DESCENDING) + +# Dump table: +foreach(entry ${entries}) + message(STATUS ${entry}) +endforeach() + +if (num_below_thresh GREATER 0) + message(STATUS "${num_below_thresh} additional tests took < ${MINSEC}s each.") +endif() diff --git a/cmake/PrintNinjaBuildTimes.cmake b/cmake/PrintNinjaBuildTimes.cmake new file mode 100644 index 00000000..65d243d3 --- /dev/null +++ b/cmake/PrintNinjaBuildTimes.cmake @@ -0,0 +1,101 @@ +## This CMake script parses a .ninja_log file (LOGFILE) and prints a list of +## build/link times, sorted longest first. +## +## cmake -DLOGFILE=<.ninja_log file> \ +## -P PrintNinjaBuildTimes.cmake +## +## If LOGFILE is omitted, the current directory's .ninja_log file is used. +################################################################################ + +cmake_minimum_required(VERSION 3.15) + +# Prepend the string with "0" until the string length equals the specified width +function(pad_string_with_zeros string_var width) + set(local_string "${${string_var}}") + string(LENGTH "${local_string}" size) + while(size LESS width) + string(PREPEND local_string "0") + string(LENGTH "${local_string}" size) + endwhile() + set(${string_var} "${local_string}" PARENT_SCOPE) +endfunction() + +################################################################################ + +if (NOT LOGFILE) + set(LOGFILE ".ninja_log") +endif() + +# Check if logfile exists +if (NOT EXISTS "${LOGFILE}") + message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').") +endif() + +# Read the logfile and generate a map / keylist +set(keys) +file(STRINGS "${LOGFILE}" lines) +foreach(line ${lines}) + + # Parse each build time + string(REGEX MATCH + "^([0-9]+)\t([0-9]+)\t[0-9]+\t([^\t]+)+\t[0-9a-fA-F]+$" _DUMMY "${line}") + + if (CMAKE_MATCH_COUNT EQUAL 3) + set(start_ms ${CMAKE_MATCH_1}) + set(end_ms ${CMAKE_MATCH_2}) + set(command "${CMAKE_MATCH_3}") + math(EXPR runtime_ms "${end_ms} - ${start_ms}") + + # Compute human readable time + math(EXPR days "${runtime_ms} / (1000 * 60 * 60 * 24)") + math(EXPR runtime_ms "${runtime_ms} - (${days} * 1000 * 60 * 60 * 24)") + math(EXPR hours "${runtime_ms} / (1000 * 60 * 60)") + math(EXPR runtime_ms "${runtime_ms} - (${hours} * 1000 * 60 * 60)") + math(EXPR minutes "${runtime_ms} / (1000 * 60)") + math(EXPR runtime_ms "${runtime_ms} - (${minutes} * 1000 * 60)") + math(EXPR seconds "${runtime_ms} / 1000") + math(EXPR milliseconds "${runtime_ms} - (${seconds} * 1000)") + + # Format time components + pad_string_with_zeros(days 3) + pad_string_with_zeros(hours 2) + pad_string_with_zeros(minutes 2) + pad_string_with_zeros(seconds 2) + pad_string_with_zeros(milliseconds 3) + + # Construct table entry + # Later values in the file for the same command overwrite earlier entries + string(MAKE_C_IDENTIFIER "${command}" key) + set(ENTRY_${key} + "${days}d ${hours}h ${minutes}m ${seconds}s ${milliseconds}ms | ${command}" + ) + + # Record the key: + list(APPEND keys "${key}") + endif() +endforeach() + +list(REMOVE_DUPLICATES keys) + +# Build the entry list: +set(entries) +foreach(key ${keys}) + list(APPEND entries "${ENTRY_${key}}") +endforeach() + +if (NOT entries) + message(FATAL_ERROR "LOGFILE contained no build entries ('${LOGFILE}').") +endif() + +# Sort in descending order: +list(SORT entries) +list(REVERSE entries) + +# Dump table: +message(STATUS "-----------------------+----------------------------") +message(STATUS "Time | Command ") +message(STATUS "-----------------------+----------------------------") + +foreach(entry ${entries}) + message(STATUS ${entry}) +endforeach() diff --git a/cmake/header_test.in.cxx b/cmake/header_test.in.cxx new file mode 100644 index 00000000..c26753e1 --- /dev/null +++ b/cmake/header_test.in.cxx @@ -0,0 +1,57 @@ +// This source file checks that: +// 1) Header <${header_str}> compiles without error. +// 2) Common macro collisions with platform/system headers are avoided. + +// Turn off failures for certain configurations: +#ifndef NVBench_IGNORE_MACRO_CHECKS + +// Define NVBench_MACRO_CHECK(macro, header), which emits a diagnostic indicating +// a potential macro collision and halts. +// +// Hacky way to build a string, but it works on all tested platforms. +#define NVBench_MACRO_CHECK(MACRO, HEADER) \ + NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \ + headers due to conflicts with HEADER macros.) + +// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we +// don't want to #include any headers other than the one being tested. +// +// This is only implemented for MSVC/GCC/Clang. +#if defined(_MSC_VER) // MSVC + +// Fake up an error for MSVC +#define NVBench_MACRO_CHECK_IMPL(msg) \ + /* Print message that looks like an error: */ \ + __pragma(message(__FILE__ ":" NVBench_MACRO_CHECK_IMPL0(__LINE__) \ + ": error: " #msg)) \ + /* abort compilation due to static_assert or syntax error: */ \ + static_assert(false, #msg); +#define NVBench_MACRO_CHECK_IMPL0(x) NVBench_MACRO_CHECK_IMPL1(x) +#define NVBench_MACRO_CHECK_IMPL1(x) #x + +#elif defined(__clang__) || defined(__GNUC__) + +// GCC/clang are easy: +#define NVBench_MACRO_CHECK_IMPL(msg) NVBench_MACRO_CHECK_IMPL0(GCC error #msg) +#define NVBench_MACRO_CHECK_IMPL0(expr) _Pragma(#expr) + +#endif + +// complex.h conflicts +#define I NVBench_MACRO_CHECK('I', complex.h) + +// windows.h conflicts +#define small NVBench_MACRO_CHECK('small', windows.h) +// We can't enable these checks without breaking some builds -- some standard +// library implementations unconditionally `#undef` these macros, which then +// causes random failures later. +// Leaving these commented out as a warning: Here be dragons. +//#define min(...) NVBench_MACRO_CHECK('min', windows.h) +//#define max(...) NVBench_MACRO_CHECK('max', windows.h) + +// termios.h conflicts (NVIDIA/thrust#1547) +#define B0 NVBench_MACRO_CHECK("B0", termios.h) + +#endif // NVBench_IGNORE_MACRO_CHECKS + +#include <${header_str}> diff --git a/cmake/patches/json_unordered_map_ice.cmake b/cmake/patches/json_unordered_map_ice.cmake new file mode 100644 index 00000000..44f37c3b --- /dev/null +++ b/cmake/patches/json_unordered_map_ice.cmake @@ -0,0 +1,22 @@ +# NVCC 11.1 and GCC 9 need a patch to build, otherwise: +# +# nlohmann/ordered_map.hpp(29): error #3316: +# Internal Compiler Error (codegen): "internal error during structure layout!" +# +# Usage: +# ${CMAKE_COMMAND} +# -D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}" +# -D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}" +# -D "CXX_ID=${CMAKE_CXX_COMPILER_ID}" +# -P "json_unordered_map_ice.cmake" + +if(CUDA_VERSION VERSION_GREATER 11.8 OR NOT CXX_ID STREQUAL "GNU" OR CXX_VERSION VERSION_LESS 9.0) + return() +endif() + +# Read the file and replace the string "JSON_NO_UNIQUE_ADDRESS" with +# "/* JSON_NO_UNIQUE_ADDRESS */". +file(READ "include/nlohmann/ordered_map.hpp" NLOHMANN_ORDERED_MAP_HPP) +string(REPLACE "JSON_NO_UNIQUE_ADDRESS" "/* [NVBench Patch] JSON_NO_UNIQUE_ADDRESS */" + NLOHMANN_ORDERED_MAP_HPP "${NLOHMANN_ORDERED_MAP_HPP}") +file(WRITE "include/nlohmann/ordered_map.hpp" "${NLOHMANN_ORDERED_MAP_HPP}") diff --git a/cmake/patches/nlohmann_json.hpp b/cmake/patches/nlohmann_json.hpp deleted file mode 100644 index 9a3a0ccb..00000000 --- a/cmake/patches/nlohmann_json.hpp +++ /dev/null @@ -1,8799 +0,0 @@ -/* - __ _____ _____ _____ - __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 3.9.1 -|_____|_____|_____|_|___| https://github.com/nlohmann/json - -Licensed under the MIT License . -SPDX-License-Identifier: MIT -Copyright (c) 2013-2019 Niels Lohmann . - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#ifndef INCLUDE_NLOHMANN_JSON_HPP_ -#define INCLUDE_NLOHMANN_JSON_HPP_ - -#define NLOHMANN_JSON_VERSION_MAJOR 3 -#define NLOHMANN_JSON_VERSION_MINOR 9 -#define NLOHMANN_JSON_VERSION_PATCH 1 - -#include // all_of, find, for_each -#include // nullptr_t, ptrdiff_t, size_t -#include // hash, less -#include // initializer_list -#include // istream, ostream -#include // random_access_iterator_tag -#include // unique_ptr -#include // accumulate -#include // string, stoi, to_string -#include // declval, forward, move, pair, swap -#include // vector - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/*! -@brief namespace for Niels Lohmann -@see https://github.com/nlohmann -@since version 1.0.0 -*/ -namespace nlohmann -{ - -/*! -@brief a class to store JSON values - -@tparam ObjectType type for JSON objects (`std::map` by default; will be used -in @ref object_t) -@tparam ArrayType type for JSON arrays (`std::vector` by default; will be used -in @ref array_t) -@tparam StringType type for JSON strings and object keys (`std::string` by -default; will be used in @ref string_t) -@tparam BooleanType type for JSON booleans (`bool` by default; will be used -in @ref boolean_t) -@tparam NumberIntegerType type for JSON integer numbers (`int64_t` by -default; will be used in @ref number_integer_t) -@tparam NumberUnsignedType type for JSON unsigned integer numbers (@c -`uint64_t` by default; will be used in @ref number_unsigned_t) -@tparam NumberFloatType type for JSON floating-point numbers (`double` by -default; will be used in @ref number_float_t) -@tparam BinaryType type for packed binary data for compatibility with binary -serialization formats (`std::vector` by default; will be used in -@ref binary_t) -@tparam AllocatorType type of the allocator to use (`std::allocator` by -default) -@tparam JSONSerializer the serializer to resolve internal calls to `to_json()` -and `from_json()` (@ref adl_serializer by default) - -@requirement The class satisfies the following concept requirements: -- Basic - - [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible): - JSON values can be default constructed. The result will be a JSON null - value. - - [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible): - A JSON value can be constructed from an rvalue argument. - - [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible): - A JSON value can be copy-constructed from an lvalue expression. - - [MoveAssignable](https://en.cppreference.com/w/cpp/named_req/MoveAssignable): - A JSON value van be assigned from an rvalue argument. - - [CopyAssignable](https://en.cppreference.com/w/cpp/named_req/CopyAssignable): - A JSON value can be copy-assigned from an lvalue expression. - - [Destructible](https://en.cppreference.com/w/cpp/named_req/Destructible): - JSON values can be destructed. -- Layout - - [StandardLayoutType](https://en.cppreference.com/w/cpp/named_req/StandardLayoutType): - JSON values have - [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard_layout): - All non-static data members are private and standard layout types, the - class has no virtual functions or (virtual) base classes. -- Library-wide - - [EqualityComparable](https://en.cppreference.com/w/cpp/named_req/EqualityComparable): - JSON values can be compared with `==`, see @ref - operator==(const_reference,const_reference). - - [LessThanComparable](https://en.cppreference.com/w/cpp/named_req/LessThanComparable): - JSON values can be compared with `<`, see @ref - operator<(const_reference,const_reference). - - [Swappable](https://en.cppreference.com/w/cpp/named_req/Swappable): - Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of - other compatible types, using unqualified function call @ref swap(). - - [NullablePointer](https://en.cppreference.com/w/cpp/named_req/NullablePointer): - JSON values can be compared against `std::nullptr_t` objects which are used - to model the `null` value. -- Container - - [Container](https://en.cppreference.com/w/cpp/named_req/Container): - JSON values can be used like STL containers and provide iterator access. - - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer); - JSON values can be used like STL containers and provide reverse iterator - access. - -@invariant The member variables @a m_value and @a m_type have the following -relationship: -- If `m_type == value_t::object`, then `m_value.object != nullptr`. -- If `m_type == value_t::array`, then `m_value.array != nullptr`. -- If `m_type == value_t::string`, then `m_value.string != nullptr`. -The invariants are checked by member function assert_invariant(). - -@internal -@note ObjectType trick from https://stackoverflow.com/a/9860911 -@endinternal - -@see [RFC 7159: The JavaScript Object Notation (JSON) Data Interchange -Format](http://rfc7159.net/rfc7159) - -@since version 1.0.0 - -@nosubgrouping -*/ -NLOHMANN_BASIC_JSON_TPL_DECLARATION -class basic_json -{ - private: - template friend struct detail::external_constructor; - friend ::nlohmann::json_pointer; - - template - friend class ::nlohmann::detail::parser; - friend ::nlohmann::detail::serializer; - template - friend class ::nlohmann::detail::iter_impl; - template - friend class ::nlohmann::detail::binary_writer; - template - friend class ::nlohmann::detail::binary_reader; - template - friend class ::nlohmann::detail::json_sax_dom_parser; - template - friend class ::nlohmann::detail::json_sax_dom_callback_parser; - - /// workaround type for MSVC - using basic_json_t = NLOHMANN_BASIC_JSON_TPL; - - // convenience aliases for types residing in namespace detail; - using lexer = ::nlohmann::detail::lexer_base; - - template - static ::nlohmann::detail::parser parser( - InputAdapterType adapter, - detail::parser_callback_tcb = nullptr, - const bool allow_exceptions = true, - const bool ignore_comments = false - ) - { - return ::nlohmann::detail::parser(std::move(adapter), - std::move(cb), allow_exceptions, ignore_comments); - } - - using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t; - template - using internal_iterator = ::nlohmann::detail::internal_iterator; - template - using iter_impl = ::nlohmann::detail::iter_impl; - template - using iteration_proxy = ::nlohmann::detail::iteration_proxy; - template using json_reverse_iterator = ::nlohmann::detail::json_reverse_iterator; - - template - using output_adapter_t = ::nlohmann::detail::output_adapter_t; - - template - using binary_reader = ::nlohmann::detail::binary_reader; - template using binary_writer = ::nlohmann::detail::binary_writer; - - using serializer = ::nlohmann::detail::serializer; - - public: - using value_t = detail::value_t; - /// JSON Pointer, see @ref nlohmann::json_pointer - using json_pointer = ::nlohmann::json_pointer; - template - using json_serializer = JSONSerializer; - /// how to treat decoding errors - using error_handler_t = detail::error_handler_t; - /// how to treat CBOR tags - using cbor_tag_handler_t = detail::cbor_tag_handler_t; - /// helper type for initializer lists of basic_json values - using initializer_list_t = std::initializer_list>; - - using input_format_t = detail::input_format_t; - /// SAX interface type, see @ref nlohmann::json_sax - using json_sax_t = json_sax; - - //////////////// - // exceptions // - //////////////// - - /// @name exceptions - /// Classes to implement user-defined exceptions. - /// @{ - - /// @copydoc detail::exception - using exception = detail::exception; - /// @copydoc detail::parse_error - using parse_error = detail::parse_error; - /// @copydoc detail::invalid_iterator - using invalid_iterator = detail::invalid_iterator; - /// @copydoc detail::type_error - using type_error = detail::type_error; - /// @copydoc detail::out_of_range - using out_of_range = detail::out_of_range; - /// @copydoc detail::other_error - using other_error = detail::other_error; - - /// @} - - - ///////////////////// - // container types // - ///////////////////// - - /// @name container types - /// The canonic container types to use @ref basic_json like any other STL - /// container. - /// @{ - - /// the type of elements in a basic_json container - using value_type = basic_json; - - /// the type of an element reference - using reference = value_type&; - /// the type of an element const reference - using const_reference = const value_type&; - - /// a type to represent differences between iterators - using difference_type = std::ptrdiff_t; - /// a type to represent container sizes - using size_type = std::size_t; - - /// the allocator type - using allocator_type = AllocatorType; - - /// the type of an element pointer - using pointer = typename std::allocator_traits::pointer; - /// the type of an element const pointer - using const_pointer = typename std::allocator_traits::const_pointer; - - /// an iterator for a basic_json container - using iterator = iter_impl; - /// a const iterator for a basic_json container - using const_iterator = iter_impl; - /// a reverse iterator for a basic_json container - using reverse_iterator = json_reverse_iterator; - /// a const reverse iterator for a basic_json container - using const_reverse_iterator = json_reverse_iterator; - - /// @} - - - /*! - @brief returns the allocator associated with the container - */ - static allocator_type get_allocator() - { - return allocator_type(); - } - - /*! - @brief returns version information on the library - - This function returns a JSON object with information about the library, - including the version number and information on the platform and compiler. - - @return JSON object holding version information - key | description - ----------- | --------------- - `compiler` | Information on the used compiler. It is an object with the following keys: `c++` (the used C++ standard), `family` (the compiler family; possible values are `clang`, `icc`, `gcc`, `ilecpp`, `msvc`, `pgcpp`, `sunpro`, and `unknown`), and `version` (the compiler version). - `copyright` | The copyright line for the library as string. - `name` | The name of the library as string. - `platform` | The used platform as string. Possible values are `win32`, `linux`, `apple`, `unix`, and `unknown`. - `url` | The URL of the project as string. - `version` | The version of the library. It is an object with the following keys: `major`, `minor`, and `patch` as defined by [Semantic Versioning](http://semver.org), and `string` (the version string). - - @liveexample{The following code shows an example output of the `meta()` - function.,meta} - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @complexity Constant. - - @since 2.1.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json meta() - { - basic_json result; - - result["copyright"] = "(C) 2013-2020 Niels Lohmann"; - result["name"] = "JSON for Modern C++"; - result["url"] = "https://github.com/nlohmann/json"; - result["version"]["string"] = - std::to_string(NLOHMANN_JSON_VERSION_MAJOR) + "." + - std::to_string(NLOHMANN_JSON_VERSION_MINOR) + "." + - std::to_string(NLOHMANN_JSON_VERSION_PATCH); - result["version"]["major"] = NLOHMANN_JSON_VERSION_MAJOR; - result["version"]["minor"] = NLOHMANN_JSON_VERSION_MINOR; - result["version"]["patch"] = NLOHMANN_JSON_VERSION_PATCH; - -#ifdef _WIN32 - result["platform"] = "win32"; -#elif defined __linux__ - result["platform"] = "linux"; -#elif defined __APPLE__ - result["platform"] = "apple"; -#elif defined __unix__ - result["platform"] = "unix"; -#else - result["platform"] = "unknown"; -#endif - -#if defined(__ICC) || defined(__INTEL_COMPILER) - result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}}; -#elif defined(__clang__) - result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; -#elif defined(__GNUC__) || defined(__GNUG__) - result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}}; -#elif defined(__HP_cc) || defined(__HP_aCC) - result["compiler"] = "hp" -#elif defined(__IBMCPP__) - result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}}; -#elif defined(_MSC_VER) - result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}}; -#elif defined(__PGI) - result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}}; -#elif defined(__SUNPRO_CC) - result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}}; -#else - result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}}; -#endif - -#ifdef __cplusplus - result["compiler"]["c++"] = std::to_string(__cplusplus); -#else - result["compiler"]["c++"] = "unknown"; -#endif - return result; - } - - - /////////////////////////// - // JSON value data types // - /////////////////////////// - - /// @name JSON value data types - /// The data types to store a JSON value. These types are derived from - /// the template arguments passed to class @ref basic_json. - /// @{ - -#if defined(JSON_HAS_CPP_14) - // Use transparent comparator if possible, combined with perfect forwarding - // on find() and count() calls prevents unnecessary string construction. - using object_comparator_t = std::less<>; -#else - using object_comparator_t = std::less; -#endif - - /*! - @brief a type for an object - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows: - > An object is an unordered collection of zero or more name/value pairs, - > where a name is a string and a value is a string, number, boolean, null, - > object, or array. - - To store objects in C++, a type is defined by the template parameters - described below. - - @tparam ObjectType the container to store objects (e.g., `std::map` or - `std::unordered_map`) - @tparam StringType the type of the keys or names (e.g., `std::string`). - The comparison function `std::less` is used to order elements - inside the container. - @tparam AllocatorType the allocator to use for objects (e.g., - `std::allocator`) - - #### Default type - - With the default values for @a ObjectType (`std::map`), @a StringType - (`std::string`), and @a AllocatorType (`std::allocator`), the default - value for @a object_t is: - - @code {.cpp} - std::map< - std::string, // key_type - basic_json, // value_type - std::less, // key_compare - std::allocator> // allocator_type - > - @endcode - - #### Behavior - - The choice of @a object_t influences the behavior of the JSON class. With - the default type, objects have the following behavior: - - - When all names are unique, objects will be interoperable in the sense - that all software implementations receiving that object will agree on - the name-value mappings. - - When the names within an object are not unique, it is unspecified which - one of the values for a given key will be chosen. For instance, - `{"key": 2, "key": 1}` could be equal to either `{"key": 1}` or - `{"key": 2}`. - - Internally, name/value pairs are stored in lexicographical order of the - names. Objects will also be serialized (see @ref dump) in this order. - For instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored - and serialized as `{"a": 2, "b": 1}`. - - When comparing objects, the order of the name/value pairs is irrelevant. - This makes objects interoperable in the sense that they will not be - affected by these differences. For instance, `{"b": 1, "a": 2}` and - `{"a": 2, "b": 1}` will be treated as equal. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the maximum depth of nesting. - - In this class, the object's limit of nesting is not explicitly constrained. - However, a maximum depth of nesting may be introduced by the compiler or - runtime environment. A theoretical limit can be queried by calling the - @ref max_size function of a JSON object. - - #### Storage - - Objects are stored as pointers in a @ref basic_json type. That is, for any - access to object values, a pointer of type `object_t*` must be - dereferenced. - - @sa @ref array_t -- type for an array value - - @since version 1.0.0 - - @note The order name/value pairs are added to the object is *not* - preserved by the library. Therefore, iterating an object may return - name/value pairs in a different order than they were originally stored. In - fact, keys will be traversed in alphabetical order as `std::map` with - `std::less` is used by default. Please note this behavior conforms to [RFC - 7159](http://rfc7159.net/rfc7159), because any order implements the - specified "unordered" nature of JSON objects. - */ - using object_t = ObjectType>>; - - /*! - @brief a type for an array - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows: - > An array is an ordered sequence of zero or more values. - - To store objects in C++, a type is defined by the template parameters - explained below. - - @tparam ArrayType container type to store arrays (e.g., `std::vector` or - `std::list`) - @tparam AllocatorType allocator to use for arrays (e.g., `std::allocator`) - - #### Default type - - With the default values for @a ArrayType (`std::vector`) and @a - AllocatorType (`std::allocator`), the default value for @a array_t is: - - @code {.cpp} - std::vector< - basic_json, // value_type - std::allocator // allocator_type - > - @endcode - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the maximum depth of nesting. - - In this class, the array's limit of nesting is not explicitly constrained. - However, a maximum depth of nesting may be introduced by the compiler or - runtime environment. A theoretical limit can be queried by calling the - @ref max_size function of a JSON array. - - #### Storage - - Arrays are stored as pointers in a @ref basic_json type. That is, for any - access to array values, a pointer of type `array_t*` must be dereferenced. - - @sa @ref object_t -- type for an object value - - @since version 1.0.0 - */ - using array_t = ArrayType>; - - /*! - @brief a type for a string - - [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows: - > A string is a sequence of zero or more Unicode characters. - - To store objects in C++, a type is defined by the template parameter - described below. Unicode values are split by the JSON class into - byte-sized characters during deserialization. - - @tparam StringType the container to store strings (e.g., `std::string`). - Note this container is used for keys/names in objects, see @ref object_t. - - #### Default type - - With the default values for @a StringType (`std::string`), the default - value for @a string_t is: - - @code {.cpp} - std::string - @endcode - - #### Encoding - - Strings are stored in UTF-8 encoding. Therefore, functions like - `std::string::size()` or `std::string::length()` return the number of - bytes in the string rather than the number of characters or glyphs. - - #### String comparison - - [RFC 7159](http://rfc7159.net/rfc7159) states: - > Software implementations are typically required to test names of object - > members for equality. Implementations that transform the textual - > representation into sequences of Unicode code units and then perform the - > comparison numerically, code unit by code unit, are interoperable in the - > sense that implementations will agree in all cases on equality or - > inequality of two strings. For example, implementations that compare - > strings with escaped characters unconverted may incorrectly find that - > `"a\\b"` and `"a\u005Cb"` are not equal. - - This implementation is interoperable as it does compare strings code unit - by code unit. - - #### Storage - - String values are stored as pointers in a @ref basic_json type. That is, - for any access to string values, a pointer of type `string_t*` must be - dereferenced. - - @since version 1.0.0 - */ - using string_t = StringType; - - /*! - @brief a type for a boolean - - [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a - type which differentiates the two literals `true` and `false`. - - To store objects in C++, a type is defined by the template parameter @a - BooleanType which chooses the type to use. - - #### Default type - - With the default values for @a BooleanType (`bool`), the default value for - @a boolean_t is: - - @code {.cpp} - bool - @endcode - - #### Storage - - Boolean values are stored directly inside a @ref basic_json type. - - @since version 1.0.0 - */ - using boolean_t = BooleanType; - - /*! - @brief a type for a number (integer) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store integer numbers in C++, a type is defined by the template - parameter @a NumberIntegerType which chooses the type to use. - - #### Default type - - With the default values for @a NumberIntegerType (`int64_t`), the default - value for @a number_integer_t is: - - @code {.cpp} - int64_t - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in integer literals lead to an interpretation as octal - number. Internally, the value will be stored as decimal number. For - instance, the C++ integer literal `010` will be serialized to `8`. - During deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the range and precision of numbers. - - When the default type is used, the maximal integer number that can be - stored is `9223372036854775807` (INT64_MAX) and the minimal integer number - that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers - that are out of range will yield over/underflow when used in a - constructor. During deserialization, too large or small integer numbers - will be automatically be stored as @ref number_unsigned_t or @ref - number_float_t. - - [RFC 7159](http://rfc7159.net/rfc7159) further states: - > Note that when such software is used, numbers that are integers and are - > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense - > that implementations will agree exactly on their numeric values. - - As this range is a subrange of the exactly supported range [INT64_MIN, - INT64_MAX], this class's integer type is interoperable. - - #### Storage - - Integer number values are stored directly inside a @ref basic_json type. - - @sa @ref number_float_t -- type for number values (floating-point) - - @sa @ref number_unsigned_t -- type for number values (unsigned integer) - - @since version 1.0.0 - */ - using number_integer_t = NumberIntegerType; - - /*! - @brief a type for a number (unsigned) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store unsigned integer numbers in C++, a type is defined by the - template parameter @a NumberUnsignedType which chooses the type to use. - - #### Default type - - With the default values for @a NumberUnsignedType (`uint64_t`), the - default value for @a number_unsigned_t is: - - @code {.cpp} - uint64_t - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in integer literals lead to an interpretation as octal - number. Internally, the value will be stored as decimal number. For - instance, the C++ integer literal `010` will be serialized to `8`. - During deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) specifies: - > An implementation may set limits on the range and precision of numbers. - - When the default type is used, the maximal integer number that can be - stored is `18446744073709551615` (UINT64_MAX) and the minimal integer - number that can be stored is `0`. Integer numbers that are out of range - will yield over/underflow when used in a constructor. During - deserialization, too large or small integer numbers will be automatically - be stored as @ref number_integer_t or @ref number_float_t. - - [RFC 7159](http://rfc7159.net/rfc7159) further states: - > Note that when such software is used, numbers that are integers and are - > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense - > that implementations will agree exactly on their numeric values. - - As this range is a subrange (when considered in conjunction with the - number_integer_t type) of the exactly supported range [0, UINT64_MAX], - this class's integer type is interoperable. - - #### Storage - - Integer number values are stored directly inside a @ref basic_json type. - - @sa @ref number_float_t -- type for number values (floating-point) - @sa @ref number_integer_t -- type for number values (integer) - - @since version 2.0.0 - */ - using number_unsigned_t = NumberUnsignedType; - - /*! - @brief a type for a number (floating-point) - - [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: - > The representation of numbers is similar to that used in most - > programming languages. A number is represented in base 10 using decimal - > digits. It contains an integer component that may be prefixed with an - > optional minus sign, which may be followed by a fraction part and/or an - > exponent part. Leading zeros are not allowed. (...) Numeric values that - > cannot be represented in the grammar below (such as Infinity and NaN) - > are not permitted. - - This description includes both integer and floating-point numbers. - However, C++ allows more precise storage if it is known whether the number - is a signed integer, an unsigned integer or a floating-point number. - Therefore, three different types, @ref number_integer_t, @ref - number_unsigned_t and @ref number_float_t are used. - - To store floating-point numbers in C++, a type is defined by the template - parameter @a NumberFloatType which chooses the type to use. - - #### Default type - - With the default values for @a NumberFloatType (`double`), the default - value for @a number_float_t is: - - @code {.cpp} - double - @endcode - - #### Default behavior - - - The restrictions about leading zeros is not enforced in C++. Instead, - leading zeros in floating-point literals will be ignored. Internally, - the value will be stored as decimal number. For instance, the C++ - floating-point literal `01.2` will be serialized to `1.2`. During - deserialization, leading zeros yield an error. - - Not-a-number (NaN) values will be serialized to `null`. - - #### Limits - - [RFC 7159](http://rfc7159.net/rfc7159) states: - > This specification allows implementations to set limits on the range and - > precision of numbers accepted. Since software that implements IEEE - > 754-2008 binary64 (double precision) numbers is generally available and - > widely used, good interoperability can be achieved by implementations - > that expect no more precision or range than these provide, in the sense - > that implementations will approximate JSON numbers within the expected - > precision. - - This implementation does exactly follow this approach, as it uses double - precision floating-point numbers. Note values smaller than - `-1.79769313486232e+308` and values greater than `1.79769313486232e+308` - will be stored as NaN internally and be serialized to `null`. - - #### Storage - - Floating-point number values are stored directly inside a @ref basic_json - type. - - @sa @ref number_integer_t -- type for number values (integer) - - @sa @ref number_unsigned_t -- type for number values (unsigned integer) - - @since version 1.0.0 - */ - using number_float_t = NumberFloatType; - - /*! - @brief a type for a packed binary type - - This type is a type designed to carry binary data that appears in various - serialized formats, such as CBOR's Major Type 2, MessagePack's bin, and - BSON's generic binary subtype. This type is NOT a part of standard JSON and - exists solely for compatibility with these binary types. As such, it is - simply defined as an ordered sequence of zero or more byte values. - - Additionally, as an implementation detail, the subtype of the binary data is - carried around as a `std::uint8_t`, which is compatible with both of the - binary data formats that use binary subtyping, (though the specific - numbering is incompatible with each other, and it is up to the user to - translate between them). - - [CBOR's RFC 7049](https://tools.ietf.org/html/rfc7049) describes this type - as: - > Major type 2: a byte string. The string's length in bytes is represented - > following the rules for positive integers (major type 0). - - [MessagePack's documentation on the bin type - family](https://github.com/msgpack/msgpack/blob/master/spec.md#bin-format-family) - describes this type as: - > Bin format family stores an byte array in 2, 3, or 5 bytes of extra bytes - > in addition to the size of the byte array. - - [BSON's specifications](http://bsonspec.org/spec.html) describe several - binary types; however, this type is intended to represent the generic binary - type which has the description: - > Generic binary subtype - This is the most commonly used binary subtype and - > should be the 'default' for drivers and tools. - - None of these impose any limitations on the internal representation other - than the basic unit of storage be some type of array whose parts are - decomposable into bytes. - - The default representation of this binary format is a - `std::vector`, which is a very common way to represent a byte - array in modern C++. - - #### Default type - - The default values for @a BinaryType is `std::vector` - - #### Storage - - Binary Arrays are stored as pointers in a @ref basic_json type. That is, - for any access to array values, a pointer of the type `binary_t*` must be - dereferenced. - - #### Notes on subtypes - - - CBOR - - Binary values are represented as byte strings. No subtypes are - supported and will be ignored when CBOR is written. - - MessagePack - - If a subtype is given and the binary array contains exactly 1, 2, 4, 8, - or 16 elements, the fixext family (fixext1, fixext2, fixext4, fixext8) - is used. For other sizes, the ext family (ext8, ext16, ext32) is used. - The subtype is then added as singed 8-bit integer. - - If no subtype is given, the bin family (bin8, bin16, bin32) is used. - - BSON - - If a subtype is given, it is used and added as unsigned 8-bit integer. - - If no subtype is given, the generic binary subtype 0x00 is used. - - @sa @ref binary -- create a binary array - - @since version 3.8.0 - */ - using binary_t = nlohmann::byte_container_with_subtype; - /// @} - - private: - - /// helper for exception-safe object creation - template - JSON_HEDLEY_RETURNS_NON_NULL - static T* create(Args&& ... args) - { - AllocatorType alloc; - using AllocatorTraits = std::allocator_traits>; - - auto deleter = [&](T * object) - { - AllocatorTraits::deallocate(alloc, object, 1); - }; - std::unique_ptr object(AllocatorTraits::allocate(alloc, 1), deleter); - AllocatorTraits::construct(alloc, object.get(), std::forward(args)...); - JSON_ASSERT(object != nullptr); - return object.release(); - } - - //////////////////////// - // JSON value storage // - //////////////////////// - - /*! - @brief a JSON value - - The actual storage for a JSON value of the @ref basic_json class. This - union combines the different storage types for the JSON value types - defined in @ref value_t. - - JSON type | value_t type | used type - --------- | --------------- | ------------------------ - object | object | pointer to @ref object_t - array | array | pointer to @ref array_t - string | string | pointer to @ref string_t - boolean | boolean | @ref boolean_t - number | number_integer | @ref number_integer_t - number | number_unsigned | @ref number_unsigned_t - number | number_float | @ref number_float_t - binary | binary | pointer to @ref binary_t - null | null | *no value is stored* - - @note Variable-length types (objects, arrays, and strings) are stored as - pointers. The size of the union should not exceed 64 bits if the default - value types are used. - - @since version 1.0.0 - */ - union json_value - { - /// object (stored with pointer to save storage) - object_t* object; - /// array (stored with pointer to save storage) - array_t* array; - /// string (stored with pointer to save storage) - string_t* string; - /// binary (stored with pointer to save storage) - binary_t* binary; - /// boolean - boolean_t boolean; - /// number (integer) - number_integer_t number_integer; - /// number (unsigned integer) - number_unsigned_t number_unsigned; - /// number (floating-point) - number_float_t number_float; - - /// default constructor (for null values) - json_value() = default; - /// constructor for booleans - json_value(boolean_t v) noexcept : boolean(v) {} - /// constructor for numbers (integer) - json_value(number_integer_t v) noexcept : number_integer(v) {} - /// constructor for numbers (unsigned) - json_value(number_unsigned_t v) noexcept : number_unsigned(v) {} - /// constructor for numbers (floating-point) - json_value(number_float_t v) noexcept : number_float(v) {} - /// constructor for empty values of a given type - json_value(value_t t) - { - switch (t) - { - case value_t::object: - { - object = create(); - break; - } - - case value_t::array: - { - array = create(); - break; - } - - case value_t::string: - { - string = create(""); - break; - } - - case value_t::binary: - { - binary = create(); - break; - } - - case value_t::boolean: - { - boolean = boolean_t(false); - break; - } - - case value_t::number_integer: - { - number_integer = number_integer_t(0); - break; - } - - case value_t::number_unsigned: - { - number_unsigned = number_unsigned_t(0); - break; - } - - case value_t::number_float: - { - number_float = number_float_t(0.0); - break; - } - - case value_t::null: - { - object = nullptr; // silence warning, see #821 - break; - } - - default: - { - object = nullptr; // silence warning, see #821 - if (JSON_HEDLEY_UNLIKELY(t == value_t::null)) - { - JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.9.1")); // LCOV_EXCL_LINE - } - break; - } - } - } - - /// constructor for strings - json_value(const string_t& value) - { - string = create(value); - } - - /// constructor for rvalue strings - json_value(string_t&& value) - { - string = create(std::move(value)); - } - - /// constructor for objects - json_value(const object_t& value) - { - object = create(value); - } - - /// constructor for rvalue objects - json_value(object_t&& value) - { - object = create(std::move(value)); - } - - /// constructor for arrays - json_value(const array_t& value) - { - array = create(value); - } - - /// constructor for rvalue arrays - json_value(array_t&& value) - { - array = create(std::move(value)); - } - - /// constructor for binary arrays - json_value(const typename binary_t::container_type& value) - { - binary = create(value); - } - - /// constructor for rvalue binary arrays - json_value(typename binary_t::container_type&& value) - { - binary = create(std::move(value)); - } - - /// constructor for binary arrays (internal type) - json_value(const binary_t& value) - { - binary = create(value); - } - - /// constructor for rvalue binary arrays (internal type) - json_value(binary_t&& value) - { - binary = create(std::move(value)); - } - - void destroy(value_t t) noexcept - { - // flatten the current json_value to a heap-allocated stack - std::vector stack; - - // move the top-level items to stack - if (t == value_t::array) - { - stack.reserve(array->size()); - std::move(array->begin(), array->end(), std::back_inserter(stack)); - } - else if (t == value_t::object) - { - stack.reserve(object->size()); - for (auto&& it : *object) - { - stack.push_back(std::move(it.second)); - } - } - - while (!stack.empty()) - { - // move the last item to local variable to be processed - basic_json current_item(std::move(stack.back())); - stack.pop_back(); - - // if current_item is array/object, move - // its children to the stack to be processed later - if (current_item.is_array()) - { - std::move(current_item.m_value.array->begin(), current_item.m_value.array->end(), - std::back_inserter(stack)); - - current_item.m_value.array->clear(); - } - else if (current_item.is_object()) - { - for (auto&& it : *current_item.m_value.object) - { - stack.push_back(std::move(it.second)); - } - - current_item.m_value.object->clear(); - } - - // it's now safe that current_item get destructed - // since it doesn't have any children - } - - switch (t) - { - case value_t::object: - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, object); - std::allocator_traits::deallocate(alloc, object, 1); - break; - } - - case value_t::array: - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, array); - std::allocator_traits::deallocate(alloc, array, 1); - break; - } - - case value_t::string: - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, string); - std::allocator_traits::deallocate(alloc, string, 1); - break; - } - - case value_t::binary: - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, binary); - std::allocator_traits::deallocate(alloc, binary, 1); - break; - } - - default: - { - break; - } - } - } - }; - - /*! - @brief checks the class invariants - - This function asserts the class invariants. It needs to be called at the - end of every constructor to make sure that created objects respect the - invariant. Furthermore, it has to be called each time the type of a JSON - value is changed, because the invariant expresses a relationship between - @a m_type and @a m_value. - */ - void assert_invariant() const noexcept - { - JSON_ASSERT(m_type != value_t::object || m_value.object != nullptr); - JSON_ASSERT(m_type != value_t::array || m_value.array != nullptr); - JSON_ASSERT(m_type != value_t::string || m_value.string != nullptr); - JSON_ASSERT(m_type != value_t::binary || m_value.binary != nullptr); - } - - public: - ////////////////////////// - // JSON parser callback // - ////////////////////////// - - /*! - @brief parser event types - - The parser callback distinguishes the following events: - - `object_start`: the parser read `{` and started to process a JSON object - - `key`: the parser read a key of a value in an object - - `object_end`: the parser read `}` and finished processing a JSON object - - `array_start`: the parser read `[` and started to process a JSON array - - `array_end`: the parser read `]` and finished processing a JSON array - - `value`: the parser finished reading a JSON value - - @image html callback_events.png "Example when certain parse events are triggered" - - @sa @ref parser_callback_t for more information and examples - */ - using parse_event_t = detail::parse_event_t; - - /*! - @brief per-element parser callback type - - With a parser callback function, the result of parsing a JSON text can be - influenced. When passed to @ref parse, it is called on certain events - (passed as @ref parse_event_t via parameter @a event) with a set recursion - depth @a depth and context JSON value @a parsed. The return value of the - callback function is a boolean indicating whether the element that emitted - the callback shall be kept or not. - - We distinguish six scenarios (determined by the event type) in which the - callback function can be called. The following table describes the values - of the parameters @a depth, @a event, and @a parsed. - - parameter @a event | description | parameter @a depth | parameter @a parsed - ------------------ | ----------- | ------------------ | ------------------- - parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded - parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key - parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object - parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded - parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array - parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value - - @image html callback_events.png "Example when certain parse events are triggered" - - Discarding a value (i.e., returning `false`) has different effects - depending on the context in which function was called: - - - Discarded values in structured types are skipped. That is, the parser - will behave as if the discarded value was never read. - - In case a value outside a structured type is skipped, it is replaced - with `null`. This case happens if the top-level element is skipped. - - @param[in] depth the depth of the recursion during parsing - - @param[in] event an event of type parse_event_t indicating the context in - the callback function has been called - - @param[in,out] parsed the current intermediate parse result; note that - writing to this value has no effect for parse_event_t::key events - - @return Whether the JSON value which called the function during parsing - should be kept (`true`) or not (`false`). In the latter case, it is either - skipped completely or replaced by an empty discarded object. - - @sa @ref parse for examples - - @since version 1.0.0 - */ - using parser_callback_t = detail::parser_callback_t; - - ////////////////// - // constructors // - ////////////////// - - /// @name constructors and destructors - /// Constructors of class @ref basic_json, copy/move constructor, copy - /// assignment, static functions creating objects, and the destructor. - /// @{ - - /*! - @brief create an empty value with a given type - - Create an empty JSON value with a given type. The value will be default - initialized with an empty value which depends on the type: - - Value type | initial value - ----------- | ------------- - null | `null` - boolean | `false` - string | `""` - number | `0` - object | `{}` - array | `[]` - binary | empty array - - @param[in] v the type of the value to create - - @complexity Constant. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The following code shows the constructor for different @ref - value_t values,basic_json__value_t} - - @sa @ref clear() -- restores the postcondition of this constructor - - @since version 1.0.0 - */ - basic_json(const value_t v) - : m_type(v), m_value(v) - { - assert_invariant(); - } - - /*! - @brief create a null object - - Create a `null` JSON value. It either takes a null pointer as parameter - (explicitly creating `null`) or no parameter (implicitly creating `null`). - The passed null pointer itself is not read -- it is only used to choose - the right constructor. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this constructor never throws - exceptions. - - @liveexample{The following code shows the constructor with and without a - null pointer parameter.,basic_json__nullptr_t} - - @since version 1.0.0 - */ - basic_json(std::nullptr_t = nullptr) noexcept - : basic_json(value_t::null) - { - assert_invariant(); - } - - /*! - @brief create a JSON value - - This is a "catch all" constructor for all compatible JSON types; that is, - types for which a `to_json()` method exists. The constructor forwards the - parameter @a val to that method (to `json_serializer::to_json` method - with `U = uncvref_t`, to be exact). - - Template type @a CompatibleType includes, but is not limited to, the - following types: - - **arrays**: @ref array_t and all kinds of compatible containers such as - `std::vector`, `std::deque`, `std::list`, `std::forward_list`, - `std::array`, `std::valarray`, `std::set`, `std::unordered_set`, - `std::multiset`, and `std::unordered_multiset` with a `value_type` from - which a @ref basic_json value can be constructed. - - **objects**: @ref object_t and all kinds of compatible associative - containers such as `std::map`, `std::unordered_map`, `std::multimap`, - and `std::unordered_multimap` with a `key_type` compatible to - @ref string_t and a `value_type` from which a @ref basic_json value can - be constructed. - - **strings**: @ref string_t, string literals, and all compatible string - containers can be used. - - **numbers**: @ref number_integer_t, @ref number_unsigned_t, - @ref number_float_t, and all convertible number types such as `int`, - `size_t`, `int64_t`, `float` or `double` can be used. - - **boolean**: @ref boolean_t / `bool` can be used. - - **binary**: @ref binary_t / `std::vector` may be used, - unfortunately because string literals cannot be distinguished from binary - character arrays by the C++ type system, all types compatible with `const - char*` will be directed to the string constructor instead. This is both - for backwards compatibility, and due to the fact that a binary type is not - a standard JSON type. - - See the examples below. - - @tparam CompatibleType a type such that: - - @a CompatibleType is not derived from `std::istream`, - - @a CompatibleType is not @ref basic_json (to avoid hijacking copy/move - constructors), - - @a CompatibleType is not a different @ref basic_json type (i.e. with different template arguments) - - @a CompatibleType is not a @ref basic_json nested type (e.g., - @ref json_pointer, @ref iterator, etc ...) - - @ref @ref json_serializer has a - `to_json(basic_json_t&, CompatibleType&&)` method - - @tparam U = `uncvref_t` - - @param[in] val the value to be forwarded to the respective constructor - - @complexity Usually linear in the size of the passed @a val, also - depending on the implementation of the called `to_json()` - method. - - @exceptionsafety Depends on the called constructor. For types directly - supported by the library (i.e., all types for which no `to_json()` function - was provided), strong guarantee holds: if an exception is thrown, there are - no changes to any JSON value. - - @liveexample{The following code shows the constructor with several - compatible types.,basic_json__CompatibleType} - - @since version 2.1.0 - */ - template < typename CompatibleType, - typename U = detail::uncvref_t, - detail::enable_if_t < - !detail::is_basic_json::value && detail::is_compatible_type::value, int > = 0 > - basic_json(CompatibleType && val) noexcept(noexcept( - JSONSerializer::to_json(std::declval(), - std::forward(val)))) - { - JSONSerializer::to_json(*this, std::forward(val)); - assert_invariant(); - } - - /*! - @brief create a JSON value from an existing one - - This is a constructor for existing @ref basic_json types. - It does not hijack copy/move constructors, since the parameter has different - template arguments than the current ones. - - The constructor tries to convert the internal @ref m_value of the parameter. - - @tparam BasicJsonType a type such that: - - @a BasicJsonType is a @ref basic_json type. - - @a BasicJsonType has different template arguments than @ref basic_json_t. - - @param[in] val the @ref basic_json value to be converted. - - @complexity Usually linear in the size of the passed @a val, also - depending on the implementation of the called `to_json()` - method. - - @exceptionsafety Depends on the called constructor. For types directly - supported by the library (i.e., all types for which no `to_json()` function - was provided), strong guarantee holds: if an exception is thrown, there are - no changes to any JSON value. - - @since version 3.2.0 - */ - template < typename BasicJsonType, - detail::enable_if_t < - detail::is_basic_json::value&& !std::is_same::value, int > = 0 > - basic_json(const BasicJsonType& val) - { - using other_boolean_t = typename BasicJsonType::boolean_t; - using other_number_float_t = typename BasicJsonType::number_float_t; - using other_number_integer_t = typename BasicJsonType::number_integer_t; - using other_number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using other_string_t = typename BasicJsonType::string_t; - using other_object_t = typename BasicJsonType::object_t; - using other_array_t = typename BasicJsonType::array_t; - using other_binary_t = typename BasicJsonType::binary_t; - - switch (val.type()) - { - case value_t::boolean: - JSONSerializer::to_json(*this, val.template get()); - break; - case value_t::number_float: - JSONSerializer::to_json(*this, val.template get()); - break; - case value_t::number_integer: - JSONSerializer::to_json(*this, val.template get()); - break; - case value_t::number_unsigned: - JSONSerializer::to_json(*this, val.template get()); - break; - case value_t::string: - JSONSerializer::to_json(*this, val.template get_ref()); - break; - case value_t::object: - JSONSerializer::to_json(*this, val.template get_ref()); - break; - case value_t::array: - JSONSerializer::to_json(*this, val.template get_ref()); - break; - case value_t::binary: - JSONSerializer::to_json(*this, val.template get_ref()); - break; - case value_t::null: - *this = nullptr; - break; - case value_t::discarded: - m_type = value_t::discarded; - break; - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // LCOV_EXCL_LINE - } - assert_invariant(); - } - - /*! - @brief create a container (array or object) from an initializer list - - Creates a JSON value of type array or object from the passed initializer - list @a init. In case @a type_deduction is `true` (default), the type of - the JSON value to be created is deducted from the initializer list @a init - according to the following rules: - - 1. If the list is empty, an empty JSON object value `{}` is created. - 2. If the list consists of pairs whose first element is a string, a JSON - object value is created where the first elements of the pairs are - treated as keys and the second elements are as values. - 3. In all other cases, an array is created. - - The rules aim to create the best fit between a C++ initializer list and - JSON values. The rationale is as follows: - - 1. The empty initializer list is written as `{}` which is exactly an empty - JSON object. - 2. C++ has no way of describing mapped types other than to list a list of - pairs. As JSON requires that keys must be of type string, rule 2 is the - weakest constraint one can pose on initializer lists to interpret them - as an object. - 3. In all other cases, the initializer list could not be interpreted as - JSON object type, so interpreting it as JSON array type is safe. - - With the rules described above, the following JSON values cannot be - expressed by an initializer list: - - - the empty array (`[]`): use @ref array(initializer_list_t) - with an empty initializer list in this case - - arrays whose elements satisfy rule 2: use @ref - array(initializer_list_t) with the same initializer list - in this case - - @note When used without parentheses around an empty initializer list, @ref - basic_json() is called instead of this function, yielding the JSON null - value. - - @param[in] init initializer list with JSON values - - @param[in] type_deduction internal parameter; when set to `true`, the type - of the JSON value is deducted from the initializer list @a init; when set - to `false`, the type provided via @a manual_type is forced. This mode is - used by the functions @ref array(initializer_list_t) and - @ref object(initializer_list_t). - - @param[in] manual_type internal parameter; when @a type_deduction is set - to `false`, the created JSON value will use the provided type (only @ref - value_t::array and @ref value_t::object are valid); when @a type_deduction - is set to `true`, this parameter has no effect - - @throw type_error.301 if @a type_deduction is `false`, @a manual_type is - `value_t::object`, but @a init contains an element which is not a pair - whose first element is a string. In this case, the constructor could not - create an object. If @a type_deduction would have be `true`, an array - would have been created. See @ref object(initializer_list_t) - for an example. - - @complexity Linear in the size of the initializer list @a init. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The example below shows how JSON values are created from - initializer lists.,basic_json__list_init_t} - - @sa @ref array(initializer_list_t) -- create a JSON array - value from an initializer list - @sa @ref object(initializer_list_t) -- create a JSON object - value from an initializer list - - @since version 1.0.0 - */ - basic_json(initializer_list_t init, - bool type_deduction = true, - value_t manual_type = value_t::array) - { - // check if each element is an array with two elements whose first - // element is a string - bool is_an_object = std::all_of(init.begin(), init.end(), - [](const detail::json_ref& element_ref) - { - return element_ref->is_array() && element_ref->size() == 2 && (*element_ref)[0].is_string(); - }); - - // adjust type if type deduction is not wanted - if (!type_deduction) - { - // if array is wanted, do not create an object though possible - if (manual_type == value_t::array) - { - is_an_object = false; - } - - // if object is wanted but impossible, throw an exception - if (JSON_HEDLEY_UNLIKELY(manual_type == value_t::object && !is_an_object)) - { - JSON_THROW(type_error::create(301, "cannot create object from initializer list")); - } - } - - if (is_an_object) - { - // the initializer list is a list of pairs -> create object - m_type = value_t::object; - m_value = value_t::object; - - std::for_each(init.begin(), init.end(), [this](const detail::json_ref& element_ref) - { - auto element = element_ref.moved_or_copied(); - m_value.object->emplace( - std::move(*((*element.m_value.array)[0].m_value.string)), - std::move((*element.m_value.array)[1])); - }); - } - else - { - // the initializer list describes an array -> create array - m_type = value_t::array; - m_value.array = create(init.begin(), init.end()); - } - - assert_invariant(); - } - - /*! - @brief explicitly create a binary array (without subtype) - - Creates a JSON binary array value from a given binary container. Binary - values are part of various binary formats, such as CBOR, MessagePack, and - BSON. This constructor is used to create a value for serialization to those - formats. - - @note Note, this function exists because of the difficulty in correctly - specifying the correct template overload in the standard value ctor, as both - JSON arrays and JSON binary arrays are backed with some form of a - `std::vector`. Because JSON binary arrays are a non-standard extension it - was decided that it would be best to prevent automatic initialization of a - binary array type, for backwards compatibility and so it does not happen on - accident. - - @param[in] init container containing bytes to use as binary type - - @return JSON binary array value - - @complexity Linear in the size of @a init. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @since version 3.8.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json binary(const typename binary_t::container_type& init) - { - auto res = basic_json(); - res.m_type = value_t::binary; - res.m_value = init; - return res; - } - - /*! - @brief explicitly create a binary array (with subtype) - - Creates a JSON binary array value from a given binary container. Binary - values are part of various binary formats, such as CBOR, MessagePack, and - BSON. This constructor is used to create a value for serialization to those - formats. - - @note Note, this function exists because of the difficulty in correctly - specifying the correct template overload in the standard value ctor, as both - JSON arrays and JSON binary arrays are backed with some form of a - `std::vector`. Because JSON binary arrays are a non-standard extension it - was decided that it would be best to prevent automatic initialization of a - binary array type, for backwards compatibility and so it does not happen on - accident. - - @param[in] init container containing bytes to use as binary type - @param[in] subtype subtype to use in MessagePack and BSON - - @return JSON binary array value - - @complexity Linear in the size of @a init. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @since version 3.8.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json binary(const typename binary_t::container_type& init, std::uint8_t subtype) - { - auto res = basic_json(); - res.m_type = value_t::binary; - res.m_value = binary_t(init, subtype); - return res; - } - - /// @copydoc binary(const typename binary_t::container_type&) - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json binary(typename binary_t::container_type&& init) - { - auto res = basic_json(); - res.m_type = value_t::binary; - res.m_value = std::move(init); - return res; - } - - /// @copydoc binary(const typename binary_t::container_type&, std::uint8_t) - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json binary(typename binary_t::container_type&& init, std::uint8_t subtype) - { - auto res = basic_json(); - res.m_type = value_t::binary; - res.m_value = binary_t(std::move(init), subtype); - return res; - } - - /*! - @brief explicitly create an array from an initializer list - - Creates a JSON array value from a given initializer list. That is, given a - list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the - initializer list is empty, the empty array `[]` is created. - - @note This function is only needed to express two edge cases that cannot - be realized with the initializer list constructor (@ref - basic_json(initializer_list_t, bool, value_t)). These cases - are: - 1. creating an array whose elements are all pairs whose first element is a - string -- in this case, the initializer list constructor would create an - object, taking the first elements as keys - 2. creating an empty array -- passing the empty initializer list to the - initializer list constructor yields an empty object - - @param[in] init initializer list with JSON values to create an array from - (optional) - - @return JSON array value - - @complexity Linear in the size of @a init. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The following code shows an example for the `array` - function.,array} - - @sa @ref basic_json(initializer_list_t, bool, value_t) -- - create a JSON value from an initializer list - @sa @ref object(initializer_list_t) -- create a JSON object - value from an initializer list - - @since version 1.0.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json array(initializer_list_t init = {}) - { - return basic_json(init, false, value_t::array); - } - - /*! - @brief explicitly create an object from an initializer list - - Creates a JSON object value from a given initializer list. The initializer - lists elements must be pairs, and their first elements must be strings. If - the initializer list is empty, the empty object `{}` is created. - - @note This function is only added for symmetry reasons. In contrast to the - related function @ref array(initializer_list_t), there are - no cases which can only be expressed by this function. That is, any - initializer list @a init can also be passed to the initializer list - constructor @ref basic_json(initializer_list_t, bool, value_t). - - @param[in] init initializer list to create an object from (optional) - - @return JSON object value - - @throw type_error.301 if @a init is not a list of pairs whose first - elements are strings. In this case, no object can be created. When such a - value is passed to @ref basic_json(initializer_list_t, bool, value_t), - an array would have been created from the passed initializer list @a init. - See example below. - - @complexity Linear in the size of @a init. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The following code shows an example for the `object` - function.,object} - - @sa @ref basic_json(initializer_list_t, bool, value_t) -- - create a JSON value from an initializer list - @sa @ref array(initializer_list_t) -- create a JSON array - value from an initializer list - - @since version 1.0.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json object(initializer_list_t init = {}) - { - return basic_json(init, false, value_t::object); - } - - /*! - @brief construct an array with count copies of given value - - Constructs a JSON array value by creating @a cnt copies of a passed value. - In case @a cnt is `0`, an empty array is created. - - @param[in] cnt the number of JSON copies of @a val to create - @param[in] val the JSON value to copy - - @post `std::distance(begin(),end()) == cnt` holds. - - @complexity Linear in @a cnt. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The following code shows examples for the @ref - basic_json(size_type\, const basic_json&) - constructor.,basic_json__size_type_basic_json} - - @since version 1.0.0 - */ - basic_json(size_type cnt, const basic_json& val) - : m_type(value_t::array) - { - m_value.array = create(cnt, val); - assert_invariant(); - } - - /*! - @brief construct a JSON container given an iterator range - - Constructs the JSON value with the contents of the range `[first, last)`. - The semantics depends on the different types a JSON value can have: - - In case of a null type, invalid_iterator.206 is thrown. - - In case of other primitive types (number, boolean, or string), @a first - must be `begin()` and @a last must be `end()`. In this case, the value is - copied. Otherwise, invalid_iterator.204 is thrown. - - In case of structured types (array, object), the constructor behaves as - similar versions for `std::vector` or `std::map`; that is, a JSON array - or object is constructed from the values in the range. - - @tparam InputIT an input iterator type (@ref iterator or @ref - const_iterator) - - @param[in] first begin of the range to copy from (included) - @param[in] last end of the range to copy from (excluded) - - @pre Iterators @a first and @a last must be initialized. **This - precondition is enforced with an assertion (see warning).** If - assertions are switched off, a violation of this precondition yields - undefined behavior. - - @pre Range `[first, last)` is valid. Usually, this precondition cannot be - checked efficiently. Only certain edge cases are detected; see the - description of the exceptions below. A violation of this precondition - yields undefined behavior. - - @warning A precondition is enforced with a runtime assertion that will - result in calling `std::abort` if this precondition is not met. - Assertions can be disabled by defining `NDEBUG` at compile time. - See https://en.cppreference.com/w/cpp/error/assert for more - information. - - @throw invalid_iterator.201 if iterators @a first and @a last are not - compatible (i.e., do not belong to the same JSON value). In this case, - the range `[first, last)` is undefined. - @throw invalid_iterator.204 if iterators @a first and @a last belong to a - primitive type (number, boolean, or string), but @a first does not point - to the first element any more. In this case, the range `[first, last)` is - undefined. See example code below. - @throw invalid_iterator.206 if iterators @a first and @a last belong to a - null value. In this case, the range `[first, last)` is undefined. - - @complexity Linear in distance between @a first and @a last. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @liveexample{The example below shows several ways to create JSON values by - specifying a subrange with iterators.,basic_json__InputIt_InputIt} - - @since version 1.0.0 - */ - template < class InputIT, typename std::enable_if < - std::is_same::value || - std::is_same::value, int >::type = 0 > - basic_json(InputIT first, InputIT last) - { - JSON_ASSERT(first.m_object != nullptr); - JSON_ASSERT(last.m_object != nullptr); - - // make sure iterator fits the current value - if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object)) - { - JSON_THROW(invalid_iterator::create(201, "iterators are not compatible")); - } - - // copy type from first iterator - m_type = first.m_object->m_type; - - // check if iterator range is complete for primitive values - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - { - if (JSON_HEDLEY_UNLIKELY(!first.m_it.primitive_iterator.is_begin() - || !last.m_it.primitive_iterator.is_end())) - { - JSON_THROW(invalid_iterator::create(204, "iterators out of range")); - } - break; - } - - default: - break; - } - - switch (m_type) - { - case value_t::number_integer: - { - m_value.number_integer = first.m_object->m_value.number_integer; - break; - } - - case value_t::number_unsigned: - { - m_value.number_unsigned = first.m_object->m_value.number_unsigned; - break; - } - - case value_t::number_float: - { - m_value.number_float = first.m_object->m_value.number_float; - break; - } - - case value_t::boolean: - { - m_value.boolean = first.m_object->m_value.boolean; - break; - } - - case value_t::string: - { - m_value = *first.m_object->m_value.string; - break; - } - - case value_t::object: - { - m_value.object = create(first.m_it.object_iterator, - last.m_it.object_iterator); - break; - } - - case value_t::array: - { - m_value.array = create(first.m_it.array_iterator, - last.m_it.array_iterator); - break; - } - - case value_t::binary: - { - m_value = *first.m_object->m_value.binary; - break; - } - - default: - JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + - std::string(first.m_object->type_name()))); - } - - assert_invariant(); - } - - - /////////////////////////////////////// - // other constructors and destructor // - /////////////////////////////////////// - - template, - std::is_same>::value, int> = 0 > - basic_json(const JsonRef& ref) : basic_json(ref.moved_or_copied()) {} - - /*! - @brief copy constructor - - Creates a copy of a given JSON value. - - @param[in] other the JSON value to copy - - @post `*this == other` - - @complexity Linear in the size of @a other. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes to any JSON value. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is linear. - - As postcondition, it holds: `other == basic_json(other)`. - - @liveexample{The following code shows an example for the copy - constructor.,basic_json__basic_json} - - @since version 1.0.0 - */ - basic_json(const basic_json& other) - : m_type(other.m_type) - { - // check of passed value is valid - other.assert_invariant(); - - switch (m_type) - { - case value_t::object: - { - m_value = *other.m_value.object; - break; - } - - case value_t::array: - { - m_value = *other.m_value.array; - break; - } - - case value_t::string: - { - m_value = *other.m_value.string; - break; - } - - case value_t::boolean: - { - m_value = other.m_value.boolean; - break; - } - - case value_t::number_integer: - { - m_value = other.m_value.number_integer; - break; - } - - case value_t::number_unsigned: - { - m_value = other.m_value.number_unsigned; - break; - } - - case value_t::number_float: - { - m_value = other.m_value.number_float; - break; - } - - case value_t::binary: - { - m_value = *other.m_value.binary; - break; - } - - default: - break; - } - - assert_invariant(); - } - - /*! - @brief move constructor - - Move constructor. Constructs a JSON value with the contents of the given - value @a other using move semantics. It "steals" the resources from @a - other and leaves it as JSON null value. - - @param[in,out] other value to move to this object - - @post `*this` has the same value as @a other before the call. - @post @a other is a JSON null value. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this constructor never throws - exceptions. - - @requirement This function helps `basic_json` satisfying the - [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible) - requirements. - - @liveexample{The code below shows the move constructor explicitly called - via std::move.,basic_json__moveconstructor} - - @since version 1.0.0 - */ - basic_json(basic_json&& other) noexcept - : m_type(std::move(other.m_type)), - m_value(std::move(other.m_value)) - { - // check that passed value is valid - other.assert_invariant(); - - // invalidate payload - other.m_type = value_t::null; - other.m_value = {}; - - assert_invariant(); - } - - /*! - @brief copy assignment - - Copy assignment operator. Copies a JSON value via the "copy and swap" - strategy: It is expressed in terms of the copy constructor, destructor, - and the `swap()` member function. - - @param[in] other value to copy from - - @complexity Linear. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is linear. - - @liveexample{The code below shows and example for the copy assignment. It - creates a copy of value `a` which is then swapped with `b`. Finally\, the - copy of `a` (which is the null value after the swap) is - destroyed.,basic_json__copyassignment} - - @since version 1.0.0 - */ - basic_json& operator=(basic_json other) noexcept ( - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value&& - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value - ) - { - // check that passed value is valid - other.assert_invariant(); - - using std::swap; - swap(m_type, other.m_type); - swap(m_value, other.m_value); - - assert_invariant(); - return *this; - } - - /*! - @brief destructor - - Destroys the JSON value and frees all allocated memory. - - @complexity Linear. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is linear. - - All stored elements are destroyed and all memory is freed. - - @since version 1.0.0 - */ - ~basic_json() noexcept - { - assert_invariant(); - m_value.destroy(m_type); - } - - /// @} - - public: - /////////////////////// - // object inspection // - /////////////////////// - - /// @name object inspection - /// Functions to inspect the type of a JSON value. - /// @{ - - /*! - @brief serialization - - Serialization function for JSON values. The function tries to mimic - Python's `json.dumps()` function, and currently supports its @a indent - and @a ensure_ascii parameters. - - @param[in] indent If indent is nonnegative, then array elements and object - members will be pretty-printed with that indent level. An indent level of - `0` will only insert newlines. `-1` (the default) selects the most compact - representation. - @param[in] indent_char The character to use for indentation if @a indent is - greater than `0`. The default is ` ` (space). - @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters - in the output are escaped with `\uXXXX` sequences, and the result consists - of ASCII characters only. - @param[in] error_handler how to react on decoding errors; there are three - possible values: `strict` (throws and exception in case a decoding error - occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD), - and `ignore` (ignore invalid UTF-8 sequences during serialization; all - bytes are copied to the output unchanged). - - @return string containing the serialization of the JSON value - - @throw type_error.316 if a string stored inside the JSON value is not - UTF-8 encoded and @a error_handler is set to strict - - @note Binary values are serialized as object containing two keys: - - "bytes": an array of bytes as integers - - "subtype": the subtype as integer or "null" if the binary has no subtype - - @complexity Linear. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @liveexample{The following example shows the effect of different @a indent\, - @a indent_char\, and @a ensure_ascii parameters to the result of the - serialization.,dump} - - @see https://docs.python.org/2/library/json.html#json.dump - - @since version 1.0.0; indentation character @a indent_char, option - @a ensure_ascii and exceptions added in version 3.0.0; error - handlers added in version 3.4.0; serialization of binary values added - in version 3.8.0. - */ - string_t dump(const int indent = -1, - const char indent_char = ' ', - const bool ensure_ascii = false, - const error_handler_t error_handler = error_handler_t::strict) const - { - string_t result; - serializer s(detail::output_adapter(result), indent_char, error_handler); - - if (indent >= 0) - { - s.dump(*this, true, ensure_ascii, static_cast(indent)); - } - else - { - s.dump(*this, false, ensure_ascii, 0); - } - - return result; - } - - /*! - @brief return the type of the JSON value (explicit) - - Return the type of the JSON value as a value from the @ref value_t - enumeration. - - @return the type of the JSON value - Value type | return value - ------------------------- | ------------------------- - null | value_t::null - boolean | value_t::boolean - string | value_t::string - number (integer) | value_t::number_integer - number (unsigned integer) | value_t::number_unsigned - number (floating-point) | value_t::number_float - object | value_t::object - array | value_t::array - binary | value_t::binary - discarded | value_t::discarded - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `type()` for all JSON - types.,type} - - @sa @ref operator value_t() -- return the type of the JSON value (implicit) - @sa @ref type_name() -- return the type as string - - @since version 1.0.0 - */ - constexpr value_t type() const noexcept - { - return m_type; - } - - /*! - @brief return whether type is primitive - - This function returns true if and only if the JSON type is primitive - (string, number, boolean, or null). - - @return `true` if type is primitive (string, number, boolean, or null), - `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_primitive()` for all JSON - types.,is_primitive} - - @sa @ref is_structured() -- returns whether JSON value is structured - @sa @ref is_null() -- returns whether JSON value is `null` - @sa @ref is_string() -- returns whether JSON value is a string - @sa @ref is_boolean() -- returns whether JSON value is a boolean - @sa @ref is_number() -- returns whether JSON value is a number - @sa @ref is_binary() -- returns whether JSON value is a binary array - - @since version 1.0.0 - */ - constexpr bool is_primitive() const noexcept - { - return is_null() || is_string() || is_boolean() || is_number() || is_binary(); - } - - /*! - @brief return whether type is structured - - This function returns true if and only if the JSON type is structured - (array or object). - - @return `true` if type is structured (array or object), `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_structured()` for all JSON - types.,is_structured} - - @sa @ref is_primitive() -- returns whether value is primitive - @sa @ref is_array() -- returns whether value is an array - @sa @ref is_object() -- returns whether value is an object - - @since version 1.0.0 - */ - constexpr bool is_structured() const noexcept - { - return is_array() || is_object(); - } - - /*! - @brief return whether value is null - - This function returns true if and only if the JSON value is null. - - @return `true` if type is null, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_null()` for all JSON - types.,is_null} - - @since version 1.0.0 - */ - constexpr bool is_null() const noexcept - { - return m_type == value_t::null; - } - - /*! - @brief return whether value is a boolean - - This function returns true if and only if the JSON value is a boolean. - - @return `true` if type is boolean, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_boolean()` for all JSON - types.,is_boolean} - - @since version 1.0.0 - */ - constexpr bool is_boolean() const noexcept - { - return m_type == value_t::boolean; - } - - /*! - @brief return whether value is a number - - This function returns true if and only if the JSON value is a number. This - includes both integer (signed and unsigned) and floating-point values. - - @return `true` if type is number (regardless whether integer, unsigned - integer or floating-type), `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number()` for all JSON - types.,is_number} - - @sa @ref is_number_integer() -- check if value is an integer or unsigned - integer number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 1.0.0 - */ - constexpr bool is_number() const noexcept - { - return is_number_integer() || is_number_float(); - } - - /*! - @brief return whether value is an integer number - - This function returns true if and only if the JSON value is a signed or - unsigned integer number. This excludes floating-point values. - - @return `true` if type is an integer or unsigned integer number, `false` - otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_integer()` for all - JSON types.,is_number_integer} - - @sa @ref is_number() -- check if value is a number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 1.0.0 - */ - constexpr bool is_number_integer() const noexcept - { - return m_type == value_t::number_integer || m_type == value_t::number_unsigned; - } - - /*! - @brief return whether value is an unsigned integer number - - This function returns true if and only if the JSON value is an unsigned - integer number. This excludes floating-point and signed integer values. - - @return `true` if type is an unsigned integer number, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_unsigned()` for all - JSON types.,is_number_unsigned} - - @sa @ref is_number() -- check if value is a number - @sa @ref is_number_integer() -- check if value is an integer or unsigned - integer number - @sa @ref is_number_float() -- check if value is a floating-point number - - @since version 2.0.0 - */ - constexpr bool is_number_unsigned() const noexcept - { - return m_type == value_t::number_unsigned; - } - - /*! - @brief return whether value is a floating-point number - - This function returns true if and only if the JSON value is a - floating-point number. This excludes signed and unsigned integer values. - - @return `true` if type is a floating-point number, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_number_float()` for all - JSON types.,is_number_float} - - @sa @ref is_number() -- check if value is number - @sa @ref is_number_integer() -- check if value is an integer number - @sa @ref is_number_unsigned() -- check if value is an unsigned integer - number - - @since version 1.0.0 - */ - constexpr bool is_number_float() const noexcept - { - return m_type == value_t::number_float; - } - - /*! - @brief return whether value is an object - - This function returns true if and only if the JSON value is an object. - - @return `true` if type is object, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_object()` for all JSON - types.,is_object} - - @since version 1.0.0 - */ - constexpr bool is_object() const noexcept - { - return m_type == value_t::object; - } - - /*! - @brief return whether value is an array - - This function returns true if and only if the JSON value is an array. - - @return `true` if type is array, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_array()` for all JSON - types.,is_array} - - @since version 1.0.0 - */ - constexpr bool is_array() const noexcept - { - return m_type == value_t::array; - } - - /*! - @brief return whether value is a string - - This function returns true if and only if the JSON value is a string. - - @return `true` if type is string, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_string()` for all JSON - types.,is_string} - - @since version 1.0.0 - */ - constexpr bool is_string() const noexcept - { - return m_type == value_t::string; - } - - /*! - @brief return whether value is a binary array - - This function returns true if and only if the JSON value is a binary array. - - @return `true` if type is binary array, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_binary()` for all JSON - types.,is_binary} - - @since version 3.8.0 - */ - constexpr bool is_binary() const noexcept - { - return m_type == value_t::binary; - } - - /*! - @brief return whether value is discarded - - This function returns true if and only if the JSON value was discarded - during parsing with a callback function (see @ref parser_callback_t). - - @note This function will always be `false` for JSON values after parsing. - That is, discarded values can only occur during parsing, but will be - removed when inside a structured value or replaced by null in other cases. - - @return `true` if type is discarded, `false` otherwise. - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies `is_discarded()` for all JSON - types.,is_discarded} - - @since version 1.0.0 - */ - constexpr bool is_discarded() const noexcept - { - return m_type == value_t::discarded; - } - - /*! - @brief return the type of the JSON value (implicit) - - Implicitly return the type of the JSON value as a value from the @ref - value_t enumeration. - - @return the type of the JSON value - - @complexity Constant. - - @exceptionsafety No-throw guarantee: this member function never throws - exceptions. - - @liveexample{The following code exemplifies the @ref value_t operator for - all JSON types.,operator__value_t} - - @sa @ref type() -- return the type of the JSON value (explicit) - @sa @ref type_name() -- return the type as string - - @since version 1.0.0 - */ - constexpr operator value_t() const noexcept - { - return m_type; - } - - /// @} - - private: - ////////////////// - // value access // - ////////////////// - - /// get a boolean (explicit) - boolean_t get_impl(boolean_t* /*unused*/) const - { - if (JSON_HEDLEY_LIKELY(is_boolean())) - { - return m_value.boolean; - } - - JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(type_name()))); - } - - /// get a pointer to the value (object) - object_t* get_impl_ptr(object_t* /*unused*/) noexcept - { - return is_object() ? m_value.object : nullptr; - } - - /// get a pointer to the value (object) - constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept - { - return is_object() ? m_value.object : nullptr; - } - - /// get a pointer to the value (array) - array_t* get_impl_ptr(array_t* /*unused*/) noexcept - { - return is_array() ? m_value.array : nullptr; - } - - /// get a pointer to the value (array) - constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept - { - return is_array() ? m_value.array : nullptr; - } - - /// get a pointer to the value (string) - string_t* get_impl_ptr(string_t* /*unused*/) noexcept - { - return is_string() ? m_value.string : nullptr; - } - - /// get a pointer to the value (string) - constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept - { - return is_string() ? m_value.string : nullptr; - } - - /// get a pointer to the value (boolean) - boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept - { - return is_boolean() ? &m_value.boolean : nullptr; - } - - /// get a pointer to the value (boolean) - constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept - { - return is_boolean() ? &m_value.boolean : nullptr; - } - - /// get a pointer to the value (integer number) - number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept - { - return is_number_integer() ? &m_value.number_integer : nullptr; - } - - /// get a pointer to the value (integer number) - constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept - { - return is_number_integer() ? &m_value.number_integer : nullptr; - } - - /// get a pointer to the value (unsigned number) - number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept - { - return is_number_unsigned() ? &m_value.number_unsigned : nullptr; - } - - /// get a pointer to the value (unsigned number) - constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept - { - return is_number_unsigned() ? &m_value.number_unsigned : nullptr; - } - - /// get a pointer to the value (floating-point number) - number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept - { - return is_number_float() ? &m_value.number_float : nullptr; - } - - /// get a pointer to the value (floating-point number) - constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept - { - return is_number_float() ? &m_value.number_float : nullptr; - } - - /// get a pointer to the value (binary) - binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept - { - return is_binary() ? m_value.binary : nullptr; - } - - /// get a pointer to the value (binary) - constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const noexcept - { - return is_binary() ? m_value.binary : nullptr; - } - - /*! - @brief helper function to implement get_ref() - - This function helps to implement get_ref() without code duplication for - const and non-const overloads - - @tparam ThisType will be deduced as `basic_json` or `const basic_json` - - @throw type_error.303 if ReferenceType does not match underlying value - type of the current JSON - */ - template - static ReferenceType get_ref_impl(ThisType& obj) - { - // delegate the call to get_ptr<>() - auto ptr = obj.template get_ptr::type>(); - - if (JSON_HEDLEY_LIKELY(ptr != nullptr)) - { - return *ptr; - } - - JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + std::string(obj.type_name()))); - } - - public: - /// @name value access - /// Direct access to the stored value of a JSON value. - /// @{ - - /*! - @brief get special-case overload - - This overloads avoids a lot of template boilerplate, it can be seen as the - identity method - - @tparam BasicJsonType == @ref basic_json - - @return a copy of *this - - @complexity Constant. - - @since version 2.1.0 - */ - template::type, basic_json_t>::value, - int> = 0> - basic_json get() const - { - return *this; - } - - /*! - @brief get special-case overload - - This overloads converts the current @ref basic_json in a different - @ref basic_json type - - @tparam BasicJsonType == @ref basic_json - - @return a copy of *this, converted into @tparam BasicJsonType - - @complexity Depending on the implementation of the called `from_json()` - method. - - @since version 3.2.0 - */ - template < typename BasicJsonType, detail::enable_if_t < - !std::is_same::value&& - detail::is_basic_json::value, int > = 0 > - BasicJsonType get() const - { - return *this; - } - - /*! - @brief get a value (explicit) - - Explicit type conversion between the JSON value and a compatible value - which is [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible) - and [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). - The value is converted by calling the @ref json_serializer - `from_json()` method. - - The function is equivalent to executing - @code {.cpp} - ValueType ret; - JSONSerializer::from_json(*this, ret); - return ret; - @endcode - - This overloads is chosen if: - - @a ValueType is not @ref basic_json, - - @ref json_serializer has a `from_json()` method of the form - `void from_json(const basic_json&, ValueType&)`, and - - @ref json_serializer does not have a `from_json()` method of - the form `ValueType from_json(const basic_json&)` - - @tparam ValueTypeCV the provided value type - @tparam ValueType the returned value type - - @return copy of the JSON value, converted to @a ValueType - - @throw what @ref json_serializer `from_json()` method throws - - @liveexample{The example below shows several conversions from JSON values - to other types. There a few things to note: (1) Floating-point numbers can - be converted to integers\, (2) A JSON array can be converted to a standard - `std::vector`\, (3) A JSON object can be converted to C++ - associative containers such as `std::unordered_map`.,get__ValueType_const} - - @since version 2.1.0 - */ - template < typename ValueTypeCV, typename ValueType = detail::uncvref_t, - detail::enable_if_t < - !detail::is_basic_json::value && - detail::has_from_json::value && - !detail::has_non_default_from_json::value, - int > = 0 > - ValueType get() const noexcept(noexcept( - JSONSerializer::from_json(std::declval(), std::declval()))) - { - // we cannot static_assert on ValueTypeCV being non-const, because - // there is support for get(), which is why we - // still need the uncvref - static_assert(!std::is_reference::value, - "get() cannot be used with reference types, you might want to use get_ref()"); - static_assert(std::is_default_constructible::value, - "types must be DefaultConstructible when used with get()"); - - ValueType ret; - JSONSerializer::from_json(*this, ret); - return ret; - } - - /*! - @brief get a value (explicit); special case - - Explicit type conversion between the JSON value and a compatible value - which is **not** [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible) - and **not** [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). - The value is converted by calling the @ref json_serializer - `from_json()` method. - - The function is equivalent to executing - @code {.cpp} - return JSONSerializer::from_json(*this); - @endcode - - This overloads is chosen if: - - @a ValueType is not @ref basic_json and - - @ref json_serializer has a `from_json()` method of the form - `ValueType from_json(const basic_json&)` - - @note If @ref json_serializer has both overloads of - `from_json()`, this one is chosen. - - @tparam ValueTypeCV the provided value type - @tparam ValueType the returned value type - - @return copy of the JSON value, converted to @a ValueType - - @throw what @ref json_serializer `from_json()` method throws - - @since version 2.1.0 - */ - template < typename ValueTypeCV, typename ValueType = detail::uncvref_t, - detail::enable_if_t < !std::is_same::value && - detail::has_non_default_from_json::value, - int > = 0 > - ValueType get() const noexcept(noexcept( - JSONSerializer::from_json(std::declval()))) - { - static_assert(!std::is_reference::value, - "get() cannot be used with reference types, you might want to use get_ref()"); - return JSONSerializer::from_json(*this); - } - - /*! - @brief get a value (explicit) - - Explicit type conversion between the JSON value and a compatible value. - The value is filled into the input parameter by calling the @ref json_serializer - `from_json()` method. - - The function is equivalent to executing - @code {.cpp} - ValueType v; - JSONSerializer::from_json(*this, v); - @endcode - - This overloads is chosen if: - - @a ValueType is not @ref basic_json, - - @ref json_serializer has a `from_json()` method of the form - `void from_json(const basic_json&, ValueType&)`, and - - @tparam ValueType the input parameter type. - - @return the input parameter, allowing chaining calls. - - @throw what @ref json_serializer `from_json()` method throws - - @liveexample{The example below shows several conversions from JSON values - to other types. There a few things to note: (1) Floating-point numbers can - be converted to integers\, (2) A JSON array can be converted to a standard - `std::vector`\, (3) A JSON object can be converted to C++ - associative containers such as `std::unordered_map`.,get_to} - - @since version 3.3.0 - */ - template < typename ValueType, - detail::enable_if_t < - !detail::is_basic_json::value&& - detail::has_from_json::value, - int > = 0 > - ValueType & get_to(ValueType& v) const noexcept(noexcept( - JSONSerializer::from_json(std::declval(), v))) - { - JSONSerializer::from_json(*this, v); - return v; - } - - // specialization to allow to call get_to with a basic_json value - // see https://github.com/nlohmann/json/issues/2175 - template::value, - int> = 0> - ValueType & get_to(ValueType& v) const - { - v = *this; - return v; - } - - template < - typename T, std::size_t N, - typename Array = T (&)[N], - detail::enable_if_t < - detail::has_from_json::value, int > = 0 > - Array get_to(T (&v)[N]) const - noexcept(noexcept(JSONSerializer::from_json( - std::declval(), v))) - { - JSONSerializer::from_json(*this, v); - return v; - } - - - /*! - @brief get a pointer value (implicit) - - Implicit pointer access to the internally stored JSON value. No copies are - made. - - @warning Writing data to the pointee of the result yields an undefined - state. - - @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref - object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, - @ref number_unsigned_t, or @ref number_float_t. Enforced by a static - assertion. - - @return pointer to the internally stored JSON value if the requested - pointer type @a PointerType fits to the JSON value; `nullptr` otherwise - - @complexity Constant. - - @liveexample{The example below shows how pointers to internal values of a - JSON value can be requested. Note that no type conversions are made and a - `nullptr` is returned if the value and the requested pointer type does not - match.,get_ptr} - - @since version 1.0.0 - */ - template::value, int>::type = 0> - auto get_ptr() noexcept -> decltype(std::declval().get_impl_ptr(std::declval())) - { - // delegate the call to get_impl_ptr<>() - return get_impl_ptr(static_cast(nullptr)); - } - - /*! - @brief get a pointer value (implicit) - @copydoc get_ptr() - */ - template < typename PointerType, typename std::enable_if < - std::is_pointer::value&& - std::is_const::type>::value, int >::type = 0 > - constexpr auto get_ptr() const noexcept -> decltype(std::declval().get_impl_ptr(std::declval())) - { - // delegate the call to get_impl_ptr<>() const - return get_impl_ptr(static_cast(nullptr)); - } - - /*! - @brief get a pointer value (explicit) - - Explicit pointer access to the internally stored JSON value. No copies are - made. - - @warning The pointer becomes invalid if the underlying JSON object - changes. - - @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref - object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, - @ref number_unsigned_t, or @ref number_float_t. - - @return pointer to the internally stored JSON value if the requested - pointer type @a PointerType fits to the JSON value; `nullptr` otherwise - - @complexity Constant. - - @liveexample{The example below shows how pointers to internal values of a - JSON value can be requested. Note that no type conversions are made and a - `nullptr` is returned if the value and the requested pointer type does not - match.,get__PointerType} - - @sa @ref get_ptr() for explicit pointer-member access - - @since version 1.0.0 - */ - template::value, int>::type = 0> - auto get() noexcept -> decltype(std::declval().template get_ptr()) - { - // delegate the call to get_ptr - return get_ptr(); - } - - /*! - @brief get a pointer value (explicit) - @copydoc get() - */ - template::value, int>::type = 0> - constexpr auto get() const noexcept -> decltype(std::declval().template get_ptr()) - { - // delegate the call to get_ptr - return get_ptr(); - } - - /*! - @brief get a reference value (implicit) - - Implicit reference access to the internally stored JSON value. No copies - are made. - - @warning Writing data to the referee of the result yields an undefined - state. - - @tparam ReferenceType reference type; must be a reference to @ref array_t, - @ref object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or - @ref number_float_t. Enforced by static assertion. - - @return reference to the internally stored JSON value if the requested - reference type @a ReferenceType fits to the JSON value; throws - type_error.303 otherwise - - @throw type_error.303 in case passed type @a ReferenceType is incompatible - with the stored JSON value; see example below - - @complexity Constant. - - @liveexample{The example shows several calls to `get_ref()`.,get_ref} - - @since version 1.1.0 - */ - template::value, int>::type = 0> - ReferenceType get_ref() - { - // delegate call to get_ref_impl - return get_ref_impl(*this); - } - - /*! - @brief get a reference value (implicit) - @copydoc get_ref() - */ - template < typename ReferenceType, typename std::enable_if < - std::is_reference::value&& - std::is_const::type>::value, int >::type = 0 > - ReferenceType get_ref() const - { - // delegate call to get_ref_impl - return get_ref_impl(*this); - } - - /*! - @brief get a value (implicit) - - Implicit type conversion between the JSON value and a compatible value. - The call is realized by calling @ref get() const. - - @tparam ValueType non-pointer type compatible to the JSON value, for - instance `int` for JSON integer numbers, `bool` for JSON booleans, or - `std::vector` types for JSON arrays. The character type of @ref string_t - as well as an initializer list of this type is excluded to avoid - ambiguities as these types implicitly convert to `std::string`. - - @return copy of the JSON value, converted to type @a ValueType - - @throw type_error.302 in case passed type @a ValueType is incompatible - to the JSON value type (e.g., the JSON value is of type boolean, but a - string is requested); see example below - - @complexity Linear in the size of the JSON value. - - @liveexample{The example below shows several conversions from JSON values - to other types. There a few things to note: (1) Floating-point numbers can - be converted to integers\, (2) A JSON array can be converted to a standard - `std::vector`\, (3) A JSON object can be converted to C++ - associative containers such as `std::unordered_map`.,operator__ValueType} - - @since version 1.0.0 - */ - template < typename ValueType, typename std::enable_if < - !std::is_pointer::value&& - !std::is_same>::value&& - !std::is_same::value&& - !detail::is_basic_json::value - && !std::is_same>::value -#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914)) - && !std::is_same::value -#endif - && detail::is_detected::value - , int >::type = 0 > - JSON_EXPLICIT operator ValueType() const - { - // delegate the call to get<>() const - return get(); - } - - /*! - @return reference to the binary value - - @throw type_error.302 if the value is not binary - - @sa @ref is_binary() to check if the value is binary - - @since version 3.8.0 - */ - binary_t& get_binary() - { - if (!is_binary()) - { - JSON_THROW(type_error::create(302, "type must be binary, but is " + std::string(type_name()))); - } - - return *get_ptr(); - } - - /// @copydoc get_binary() - const binary_t& get_binary() const - { - if (!is_binary()) - { - JSON_THROW(type_error::create(302, "type must be binary, but is " + std::string(type_name()))); - } - - return *get_ptr(); - } - - /// @} - - - //////////////////// - // element access // - //////////////////// - - /// @name element access - /// Access to the JSON value. - /// @{ - - /*! - @brief access specified array element with bounds checking - - Returns a reference to the element at specified location @a idx, with - bounds checking. - - @param[in] idx index of the element to access - - @return reference to the element at index @a idx - - @throw type_error.304 if the JSON value is not an array; in this case, - calling `at` with an index makes no sense. See example below. - @throw out_of_range.401 if the index @a idx is out of range of the array; - that is, `idx >= size()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 1.0.0 - - @liveexample{The example below shows how array elements can be read and - written using `at()`. It also demonstrates the different exceptions that - can be thrown.,at__size_type} - */ - reference at(size_type idx) - { - // at only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - JSON_TRY - { - return m_value.array->at(idx); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); - } - } - else - { - JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); - } - } - - /*! - @brief access specified array element with bounds checking - - Returns a const reference to the element at specified location @a idx, - with bounds checking. - - @param[in] idx index of the element to access - - @return const reference to the element at index @a idx - - @throw type_error.304 if the JSON value is not an array; in this case, - calling `at` with an index makes no sense. See example below. - @throw out_of_range.401 if the index @a idx is out of range of the array; - that is, `idx >= size()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 1.0.0 - - @liveexample{The example below shows how array elements can be read using - `at()`. It also demonstrates the different exceptions that can be thrown., - at__size_type_const} - */ - const_reference at(size_type idx) const - { - // at only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - JSON_TRY - { - return m_value.array->at(idx); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); - } - } - else - { - JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); - } - } - - /*! - @brief access specified object element with bounds checking - - Returns a reference to the element at with specified key @a key, with - bounds checking. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.304 if the JSON value is not an object; in this case, - calling `at` with a key makes no sense. See example below. - @throw out_of_range.403 if the key @a key is is not stored in the object; - that is, `find(key) == end()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Logarithmic in the size of the container. - - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - - @liveexample{The example below shows how object elements can be read and - written using `at()`. It also demonstrates the different exceptions that - can be thrown.,at__object_t_key_type} - */ - reference at(const typename object_t::key_type& key) - { - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - JSON_TRY - { - return m_value.object->at(key); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); - } - } - else - { - JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); - } - } - - /*! - @brief access specified object element with bounds checking - - Returns a const reference to the element at with specified key @a key, - with bounds checking. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @throw type_error.304 if the JSON value is not an object; in this case, - calling `at` with a key makes no sense. See example below. - @throw out_of_range.403 if the key @a key is is not stored in the object; - that is, `find(key) == end()`. See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Logarithmic in the size of the container. - - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - - @liveexample{The example below shows how object elements can be read using - `at()`. It also demonstrates the different exceptions that can be thrown., - at__object_t_key_type_const} - */ - const_reference at(const typename object_t::key_type& key) const - { - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - JSON_TRY - { - return m_value.object->at(key); - } - JSON_CATCH (std::out_of_range&) - { - // create better exception explanation - JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); - } - } - else - { - JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); - } - } - - /*! - @brief access specified array element - - Returns a reference to the element at specified location @a idx. - - @note If @a idx is beyond the range of the array (i.e., `idx >= size()`), - then the array is silently filled up with `null` values to make `idx` a - valid reference to the last stored element. - - @param[in] idx index of the element to access - - @return reference to the element at index @a idx - - @throw type_error.305 if the JSON value is not an array or null; in that - cases, using the [] operator with an index makes no sense. - - @complexity Constant if @a idx is in the range of the array. Otherwise - linear in `idx - size()`. - - @liveexample{The example below shows how array elements can be read and - written using `[]` operator. Note the addition of `null` - values.,operatorarray__size_type} - - @since version 1.0.0 - */ - reference operator[](size_type idx) - { - // implicitly convert null value to an empty array - if (is_null()) - { - m_type = value_t::array; - m_value.array = create(); - assert_invariant(); - } - - // operator[] only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - // fill up array with null values if given idx is outside range - if (idx >= m_value.array->size()) - { - m_value.array->insert(m_value.array->end(), - idx - m_value.array->size() + 1, - basic_json()); - } - - return m_value.array->operator[](idx); - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()))); - } - - /*! - @brief access specified array element - - Returns a const reference to the element at specified location @a idx. - - @param[in] idx index of the element to access - - @return const reference to the element at index @a idx - - @throw type_error.305 if the JSON value is not an array; in that case, - using the [] operator with an index makes no sense. - - @complexity Constant. - - @liveexample{The example below shows how array elements can be read using - the `[]` operator.,operatorarray__size_type_const} - - @since version 1.0.0 - */ - const_reference operator[](size_type idx) const - { - // const operator[] only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - return m_value.array->operator[](idx); - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()))); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - reference operator[](const typename object_t::key_type& key) - { - // implicitly convert null value to an empty object - if (is_null()) - { - m_type = value_t::object; - m_value.object = create(); - assert_invariant(); - } - - // operator[] only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - return m_value.object->operator[](key); - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @pre The element with key @a key must exist. **This precondition is - enforced with an assertion.** - - @throw type_error.305 if the JSON value is not an object; in that case, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.0.0 - */ - const_reference operator[](const typename object_t::key_type& key) const - { - // const operator[] only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - JSON_ASSERT(m_value.object->find(key) != m_value.object->end()); - return m_value.object->find(key)->second; - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); - } - - /*! - @brief access specified object element - - Returns a reference to the element at with specified key @a key. - - @note If @a key is not found in the object, then it is silently added to - the object and filled with a `null` value to make `key` a valid reference. - In case the value was `null` before, it is converted to an object. - - @param[in] key key of the element to access - - @return reference to the element at key @a key - - @throw type_error.305 if the JSON value is not an object or null; in that - cases, using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read and - written using the `[]` operator.,operatorarray__key_type} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.1.0 - */ - template - JSON_HEDLEY_NON_NULL(2) - reference operator[](T* key) - { - // implicitly convert null to object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - return m_value.object->operator[](key); - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); - } - - /*! - @brief read-only access specified object element - - Returns a const reference to the element at with specified key @a key. No - bounds checking is performed. - - @warning If the element with key @a key does not exist, the behavior is - undefined. - - @param[in] key key of the element to access - - @return const reference to the element at key @a key - - @pre The element with key @a key must exist. **This precondition is - enforced with an assertion.** - - @throw type_error.305 if the JSON value is not an object; in that case, - using the [] operator with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be read using - the `[]` operator.,operatorarray__key_type_const} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref value() for access by value with a default value - - @since version 1.1.0 - */ - template - JSON_HEDLEY_NON_NULL(2) - const_reference operator[](T* key) const - { - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - JSON_ASSERT(m_value.object->find(key) != m_value.object->end()); - return m_value.object->find(key)->second; - } - - JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); - } - - /*! - @brief access specified object element with default value - - Returns either a copy of an object's element at the specified key @a key - or a given default value if no element with key @a key exists. - - The function is basically equivalent to executing - @code {.cpp} - try { - return at(key); - } catch(out_of_range) { - return default_value; - } - @endcode - - @note Unlike @ref at(const typename object_t::key_type&), this function - does not throw if the given key @a key was not found. - - @note Unlike @ref operator[](const typename object_t::key_type& key), this - function does not implicitly add an element to the position defined by @a - key. This function is furthermore also applicable to const objects. - - @param[in] key key of the element to access - @param[in] default_value the value to return if @a key is not found - - @tparam ValueType type compatible to JSON values, for instance `int` for - JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for - JSON arrays. Note the type of the expected value at @a key and the default - value @a default_value must be compatible. - - @return copy of the element at key @a key or @a default_value if @a key - is not found - - @throw type_error.302 if @a default_value does not match the type of the - value at @a key - @throw type_error.306 if the JSON value is not an object; in that case, - using `value()` with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be queried - with a default value.,basic_json__value} - - @sa @ref at(const typename object_t::key_type&) for access by reference - with range checking - @sa @ref operator[](const typename object_t::key_type&) for unchecked - access by reference - - @since version 1.0.0 - */ - // using std::is_convertible in a std::enable_if will fail when using explicit conversions - template < class ValueType, typename std::enable_if < - detail::is_getable::value - && !std::is_same::value, int >::type = 0 > - ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const - { - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - // if key is found, return value and given default value otherwise - const auto it = find(key); - if (it != end()) - { - return it->template get(); - } - - return default_value; - } - - JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); - } - - /*! - @brief overload for a default value of type const char* - @copydoc basic_json::value(const typename object_t::key_type&, const ValueType&) const - */ - string_t value(const typename object_t::key_type& key, const char* default_value) const - { - return value(key, string_t(default_value)); - } - - /*! - @brief access specified object element via JSON Pointer with default value - - Returns either a copy of an object's element at the specified key @a key - or a given default value if no element with key @a key exists. - - The function is basically equivalent to executing - @code {.cpp} - try { - return at(ptr); - } catch(out_of_range) { - return default_value; - } - @endcode - - @note Unlike @ref at(const json_pointer&), this function does not throw - if the given key @a key was not found. - - @param[in] ptr a JSON pointer to the element to access - @param[in] default_value the value to return if @a ptr found no value - - @tparam ValueType type compatible to JSON values, for instance `int` for - JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for - JSON arrays. Note the type of the expected value at @a key and the default - value @a default_value must be compatible. - - @return copy of the element at key @a key or @a default_value if @a key - is not found - - @throw type_error.302 if @a default_value does not match the type of the - value at @a ptr - @throw type_error.306 if the JSON value is not an object; in that case, - using `value()` with a key makes no sense. - - @complexity Logarithmic in the size of the container. - - @liveexample{The example below shows how object elements can be queried - with a default value.,basic_json__value_ptr} - - @sa @ref operator[](const json_pointer&) for unchecked access by reference - - @since version 2.0.2 - */ - template::value, int>::type = 0> - ValueType value(const json_pointer& ptr, const ValueType& default_value) const - { - // at only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - // if pointer resolves a value, return it or use default value - JSON_TRY - { - return ptr.get_checked(this).template get(); - } - JSON_INTERNAL_CATCH (out_of_range&) - { - return default_value; - } - } - - JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); - } - - /*! - @brief overload for a default value of type const char* - @copydoc basic_json::value(const json_pointer&, ValueType) const - */ - JSON_HEDLEY_NON_NULL(3) - string_t value(const json_pointer& ptr, const char* default_value) const - { - return value(ptr, string_t(default_value)); - } - - /*! - @brief access the first element - - Returns a reference to the first element in the container. For a JSON - container `c`, the expression `c.front()` is equivalent to `*c.begin()`. - - @return In case of a structured type (array or object), a reference to the - first element is returned. In case of number, string, boolean, or binary - values, a reference to the value is returned. - - @complexity Constant. - - @pre The JSON value must not be `null` (would throw `std::out_of_range`) - or an empty array or object (undefined behavior, **guarded by - assertions**). - @post The JSON value remains unchanged. - - @throw invalid_iterator.214 when called on `null` value - - @liveexample{The following code shows an example for `front()`.,front} - - @sa @ref back() -- access the last element - - @since version 1.0.0 - */ - reference front() - { - return *begin(); - } - - /*! - @copydoc basic_json::front() - */ - const_reference front() const - { - return *cbegin(); - } - - /*! - @brief access the last element - - Returns a reference to the last element in the container. For a JSON - container `c`, the expression `c.back()` is equivalent to - @code {.cpp} - auto tmp = c.end(); - --tmp; - return *tmp; - @endcode - - @return In case of a structured type (array or object), a reference to the - last element is returned. In case of number, string, boolean, or binary - values, a reference to the value is returned. - - @complexity Constant. - - @pre The JSON value must not be `null` (would throw `std::out_of_range`) - or an empty array or object (undefined behavior, **guarded by - assertions**). - @post The JSON value remains unchanged. - - @throw invalid_iterator.214 when called on a `null` value. See example - below. - - @liveexample{The following code shows an example for `back()`.,back} - - @sa @ref front() -- access the first element - - @since version 1.0.0 - */ - reference back() - { - auto tmp = end(); - --tmp; - return *tmp; - } - - /*! - @copydoc basic_json::back() - */ - const_reference back() const - { - auto tmp = cend(); - --tmp; - return *tmp; - } - - /*! - @brief remove element given an iterator - - Removes the element specified by iterator @a pos. The iterator @a pos must - be valid and dereferenceable. Thus the `end()` iterator (which is valid, - but is not dereferenceable) cannot be used as a value for @a pos. - - If called on a primitive type other than `null`, the resulting JSON value - will be `null`. - - @param[in] pos iterator to the element to remove - @return Iterator following the last removed element. If the iterator @a - pos refers to the last element, the `end()` iterator is returned. - - @tparam IteratorType an @ref iterator or @ref const_iterator - - @post Invalidates iterators and references at or after the point of the - erase, including the `end()` iterator. - - @throw type_error.307 if called on a `null` value; example: `"cannot use - erase() with null"` - @throw invalid_iterator.202 if called on an iterator which does not belong - to the current JSON value; example: `"iterator does not fit current - value"` - @throw invalid_iterator.205 if called on a primitive type with invalid - iterator (i.e., any iterator which is not `begin()`); example: `"iterator - out of range"` - - @complexity The complexity depends on the type: - - objects: amortized constant - - arrays: linear in distance between @a pos and the end of the container - - strings and binary: linear in the length of the member - - other types: constant - - @liveexample{The example shows the result of `erase()` for different JSON - types.,erase__IteratorType} - - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - template < class IteratorType, typename std::enable_if < - std::is_same::value || - std::is_same::value, int >::type - = 0 > - IteratorType erase(IteratorType pos) - { - // make sure iterator fits the current value - if (JSON_HEDLEY_UNLIKELY(this != pos.m_object)) - { - JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); - } - - IteratorType result = end(); - - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - case value_t::binary: - { - if (JSON_HEDLEY_UNLIKELY(!pos.m_it.primitive_iterator.is_begin())) - { - JSON_THROW(invalid_iterator::create(205, "iterator out of range")); - } - - if (is_string()) - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, m_value.string); - std::allocator_traits::deallocate(alloc, m_value.string, 1); - m_value.string = nullptr; - } - else if (is_binary()) - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, m_value.binary); - std::allocator_traits::deallocate(alloc, m_value.binary, 1); - m_value.binary = nullptr; - } - - m_type = value_t::null; - assert_invariant(); - break; - } - - case value_t::object: - { - result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator); - break; - } - - case value_t::array: - { - result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator); - break; - } - - default: - JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); - } - - return result; - } - - /*! - @brief remove elements given an iterator range - - Removes the element specified by the range `[first; last)`. The iterator - @a first does not need to be dereferenceable if `first == last`: erasing - an empty range is a no-op. - - If called on a primitive type other than `null`, the resulting JSON value - will be `null`. - - @param[in] first iterator to the beginning of the range to remove - @param[in] last iterator past the end of the range to remove - @return Iterator following the last removed element. If the iterator @a - second refers to the last element, the `end()` iterator is returned. - - @tparam IteratorType an @ref iterator or @ref const_iterator - - @post Invalidates iterators and references at or after the point of the - erase, including the `end()` iterator. - - @throw type_error.307 if called on a `null` value; example: `"cannot use - erase() with null"` - @throw invalid_iterator.203 if called on iterators which does not belong - to the current JSON value; example: `"iterators do not fit current value"` - @throw invalid_iterator.204 if called on a primitive type with invalid - iterators (i.e., if `first != begin()` and `last != end()`); example: - `"iterators out of range"` - - @complexity The complexity depends on the type: - - objects: `log(size()) + std::distance(first, last)` - - arrays: linear in the distance between @a first and @a last, plus linear - in the distance between @a last and end of the container - - strings and binary: linear in the length of the member - - other types: constant - - @liveexample{The example shows the result of `erase()` for different JSON - types.,erase__IteratorType_IteratorType} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - template < class IteratorType, typename std::enable_if < - std::is_same::value || - std::is_same::value, int >::type - = 0 > - IteratorType erase(IteratorType first, IteratorType last) - { - // make sure iterator fits the current value - if (JSON_HEDLEY_UNLIKELY(this != first.m_object || this != last.m_object)) - { - JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value")); - } - - IteratorType result = end(); - - switch (m_type) - { - case value_t::boolean: - case value_t::number_float: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::string: - case value_t::binary: - { - if (JSON_HEDLEY_LIKELY(!first.m_it.primitive_iterator.is_begin() - || !last.m_it.primitive_iterator.is_end())) - { - JSON_THROW(invalid_iterator::create(204, "iterators out of range")); - } - - if (is_string()) - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, m_value.string); - std::allocator_traits::deallocate(alloc, m_value.string, 1); - m_value.string = nullptr; - } - else if (is_binary()) - { - AllocatorType alloc; - std::allocator_traits::destroy(alloc, m_value.binary); - std::allocator_traits::deallocate(alloc, m_value.binary, 1); - m_value.binary = nullptr; - } - - m_type = value_t::null; - assert_invariant(); - break; - } - - case value_t::object: - { - result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator, - last.m_it.object_iterator); - break; - } - - case value_t::array: - { - result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator, - last.m_it.array_iterator); - break; - } - - default: - JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); - } - - return result; - } - - /*! - @brief remove element from a JSON object given a key - - Removes elements from a JSON object with the key value @a key. - - @param[in] key value of the elements to remove - - @return Number of elements removed. If @a ObjectType is the default - `std::map` type, the return value will always be `0` (@a key was not - found) or `1` (@a key was found). - - @post References and iterators to the erased elements are invalidated. - Other references and iterators are not affected. - - @throw type_error.307 when called on a type other than JSON object; - example: `"cannot use erase() with null"` - - @complexity `log(size()) + count(key)` - - @liveexample{The example shows the effect of `erase()`.,erase__key_type} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const size_type) -- removes the element from an array at - the given index - - @since version 1.0.0 - */ - size_type erase(const typename object_t::key_type& key) - { - // this erase only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - return m_value.object->erase(key); - } - - JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); - } - - /*! - @brief remove element from a JSON array given an index - - Removes element from a JSON array at the index @a idx. - - @param[in] idx index of the element to remove - - @throw type_error.307 when called on a type other than JSON object; - example: `"cannot use erase() with null"` - @throw out_of_range.401 when `idx >= size()`; example: `"array index 17 - is out of range"` - - @complexity Linear in distance between @a idx and the end of the container. - - @liveexample{The example shows the effect of `erase()`.,erase__size_type} - - @sa @ref erase(IteratorType) -- removes the element at a given position - @sa @ref erase(IteratorType, IteratorType) -- removes the elements in - the given range - @sa @ref erase(const typename object_t::key_type&) -- removes the element - from an object at the given key - - @since version 1.0.0 - */ - void erase(const size_type idx) - { - // this erase only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - if (JSON_HEDLEY_UNLIKELY(idx >= size())) - { - JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); - } - - m_value.array->erase(m_value.array->begin() + static_cast(idx)); - } - else - { - JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); - } - } - - /// @} - - - //////////// - // lookup // - //////////// - - /// @name lookup - /// @{ - - /*! - @brief find an element in a JSON object - - Finds an element in a JSON object with key equivalent to @a key. If the - element is not found or the JSON value is not an object, end() is - returned. - - @note This method always returns @ref end() when executed on a JSON type - that is not an object. - - @param[in] key key value of the element to search for. - - @return Iterator to an element with key equivalent to @a key. If no such - element is found or the JSON value is not an object, past-the-end (see - @ref end()) iterator is returned. - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The example shows how `find()` is used.,find__key_type} - - @sa @ref contains(KeyT&&) const -- checks whether a key exists - - @since version 1.0.0 - */ - template - iterator find(KeyT&& key) - { - auto result = end(); - - if (is_object()) - { - result.m_it.object_iterator = m_value.object->find(std::forward(key)); - } - - return result; - } - - /*! - @brief find an element in a JSON object - @copydoc find(KeyT&&) - */ - template - const_iterator find(KeyT&& key) const - { - auto result = cend(); - - if (is_object()) - { - result.m_it.object_iterator = m_value.object->find(std::forward(key)); - } - - return result; - } - - /*! - @brief returns the number of occurrences of a key in a JSON object - - Returns the number of elements with key @a key. If ObjectType is the - default `std::map` type, the return value will always be `0` (@a key was - not found) or `1` (@a key was found). - - @note This method always returns `0` when executed on a JSON type that is - not an object. - - @param[in] key key value of the element to count - - @return Number of elements with key @a key. If the JSON value is not an - object, the return value will be `0`. - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The example shows how `count()` is used.,count} - - @since version 1.0.0 - */ - template - size_type count(KeyT&& key) const - { - // return 0 for all nonobject types - return is_object() ? m_value.object->count(std::forward(key)) : 0; - } - - /*! - @brief check the existence of an element in a JSON object - - Check whether an element exists in a JSON object with key equivalent to - @a key. If the element is not found or the JSON value is not an object, - false is returned. - - @note This method always returns false when executed on a JSON type - that is not an object. - - @param[in] key key value to check its existence. - - @return true if an element with specified @a key exists. If no such - element with such key is found or the JSON value is not an object, - false is returned. - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The following code shows an example for `contains()`.,contains} - - @sa @ref find(KeyT&&) -- returns an iterator to an object element - @sa @ref contains(const json_pointer&) const -- checks the existence for a JSON pointer - - @since version 3.6.0 - */ - template < typename KeyT, typename std::enable_if < - !std::is_same::type, json_pointer>::value, int >::type = 0 > - bool contains(KeyT && key) const - { - return is_object() && m_value.object->find(std::forward(key)) != m_value.object->end(); - } - - /*! - @brief check the existence of an element in a JSON object given a JSON pointer - - Check whether the given JSON pointer @a ptr can be resolved in the current - JSON value. - - @note This method can be executed on any JSON value type. - - @param[in] ptr JSON pointer to check its existence. - - @return true if the JSON pointer can be resolved to a stored value, false - otherwise. - - @post If `j.contains(ptr)` returns true, it is safe to call `j[ptr]`. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - - @complexity Logarithmic in the size of the JSON object. - - @liveexample{The following code shows an example for `contains()`.,contains_json_pointer} - - @sa @ref contains(KeyT &&) const -- checks the existence of a key - - @since version 3.7.0 - */ - bool contains(const json_pointer& ptr) const - { - return ptr.contains(this); - } - - /// @} - - - /////////////// - // iterators // - /////////////// - - /// @name iterators - /// @{ - - /*! - @brief returns an iterator to the first element - - Returns an iterator to the first element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return iterator to the first element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - @liveexample{The following code shows an example for `begin()`.,begin} - - @sa @ref cbegin() -- returns a const iterator to the beginning - @sa @ref end() -- returns an iterator to the end - @sa @ref cend() -- returns a const iterator to the end - - @since version 1.0.0 - */ - iterator begin() noexcept - { - iterator result(this); - result.set_begin(); - return result; - } - - /*! - @copydoc basic_json::cbegin() - */ - const_iterator begin() const noexcept - { - return cbegin(); - } - - /*! - @brief returns a const iterator to the first element - - Returns a const iterator to the first element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return const iterator to the first element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).begin()`. - - @liveexample{The following code shows an example for `cbegin()`.,cbegin} - - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref end() -- returns an iterator to the end - @sa @ref cend() -- returns a const iterator to the end - - @since version 1.0.0 - */ - const_iterator cbegin() const noexcept - { - const_iterator result(this); - result.set_begin(); - return result; - } - - /*! - @brief returns an iterator to one past the last element - - Returns an iterator to one past the last element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return iterator one past the last element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - @liveexample{The following code shows an example for `end()`.,end} - - @sa @ref cend() -- returns a const iterator to the end - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref cbegin() -- returns a const iterator to the beginning - - @since version 1.0.0 - */ - iterator end() noexcept - { - iterator result(this); - result.set_end(); - return result; - } - - /*! - @copydoc basic_json::cend() - */ - const_iterator end() const noexcept - { - return cend(); - } - - /*! - @brief returns a const iterator to one past the last element - - Returns a const iterator to one past the last element. - - @image html range-begin-end.svg "Illustration from cppreference.com" - - @return const iterator one past the last element - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).end()`. - - @liveexample{The following code shows an example for `cend()`.,cend} - - @sa @ref end() -- returns an iterator to the end - @sa @ref begin() -- returns an iterator to the beginning - @sa @ref cbegin() -- returns a const iterator to the beginning - - @since version 1.0.0 - */ - const_iterator cend() const noexcept - { - const_iterator result(this); - result.set_end(); - return result; - } - - /*! - @brief returns an iterator to the reverse-beginning - - Returns an iterator to the reverse-beginning; that is, the last element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `reverse_iterator(end())`. - - @liveexample{The following code shows an example for `rbegin()`.,rbegin} - - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref crend() -- returns a const reverse iterator to the end - - @since version 1.0.0 - */ - reverse_iterator rbegin() noexcept - { - return reverse_iterator(end()); - } - - /*! - @copydoc basic_json::crbegin() - */ - const_reverse_iterator rbegin() const noexcept - { - return crbegin(); - } - - /*! - @brief returns an iterator to the reverse-end - - Returns an iterator to the reverse-end; that is, one before the first - element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `reverse_iterator(begin())`. - - @liveexample{The following code shows an example for `rend()`.,rend} - - @sa @ref crend() -- returns a const reverse iterator to the end - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - - @since version 1.0.0 - */ - reverse_iterator rend() noexcept - { - return reverse_iterator(begin()); - } - - /*! - @copydoc basic_json::crend() - */ - const_reverse_iterator rend() const noexcept - { - return crend(); - } - - /*! - @brief returns a const reverse iterator to the last element - - Returns a const iterator to the reverse-beginning; that is, the last - element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).rbegin()`. - - @liveexample{The following code shows an example for `crbegin()`.,crbegin} - - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref crend() -- returns a const reverse iterator to the end - - @since version 1.0.0 - */ - const_reverse_iterator crbegin() const noexcept - { - return const_reverse_iterator(cend()); - } - - /*! - @brief returns a const reverse iterator to one before the first - - Returns a const reverse iterator to the reverse-end; that is, one before - the first element. - - @image html range-rbegin-rend.svg "Illustration from cppreference.com" - - @complexity Constant. - - @requirement This function helps `basic_json` satisfying the - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) - requirements: - - The complexity is constant. - - Has the semantics of `const_cast(*this).rend()`. - - @liveexample{The following code shows an example for `crend()`.,crend} - - @sa @ref rend() -- returns a reverse iterator to the end - @sa @ref rbegin() -- returns a reverse iterator to the beginning - @sa @ref crbegin() -- returns a const reverse iterator to the beginning - - @since version 1.0.0 - */ - const_reverse_iterator crend() const noexcept - { - return const_reverse_iterator(cbegin()); - } - - public: - /*! - @brief wrapper to access iterator member functions in range-based for - - This function allows to access @ref iterator::key() and @ref - iterator::value() during range-based for loops. In these loops, a - reference to the JSON values is returned, so there is no access to the - underlying iterator. - - For loop without iterator_wrapper: - - @code{cpp} - for (auto it = j_object.begin(); it != j_object.end(); ++it) - { - std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; - } - @endcode - - Range-based for loop without iterator proxy: - - @code{cpp} - for (auto it : j_object) - { - // "it" is of type json::reference and has no key() member - std::cout << "value: " << it << '\n'; - } - @endcode - - Range-based for loop with iterator proxy: - - @code{cpp} - for (auto it : json::iterator_wrapper(j_object)) - { - std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; - } - @endcode - - @note When iterating over an array, `key()` will return the index of the - element as string (see example). - - @param[in] ref reference to a JSON value - @return iteration proxy object wrapping @a ref with an interface to use in - range-based for loops - - @liveexample{The following code shows how the wrapper is used,iterator_wrapper} - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @note The name of this function is not yet final and may change in the - future. - - @deprecated This stream operator is deprecated and will be removed in - future 4.0.0 of the library. Please use @ref items() instead; - that is, replace `json::iterator_wrapper(j)` with `j.items()`. - */ - JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items()) - static iteration_proxy iterator_wrapper(reference ref) noexcept - { - return ref.items(); - } - - /*! - @copydoc iterator_wrapper(reference) - */ - JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items()) - static iteration_proxy iterator_wrapper(const_reference ref) noexcept - { - return ref.items(); - } - - /*! - @brief helper to access iterator member functions in range-based for - - This function allows to access @ref iterator::key() and @ref - iterator::value() during range-based for loops. In these loops, a - reference to the JSON values is returned, so there is no access to the - underlying iterator. - - For loop without `items()` function: - - @code{cpp} - for (auto it = j_object.begin(); it != j_object.end(); ++it) - { - std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; - } - @endcode - - Range-based for loop without `items()` function: - - @code{cpp} - for (auto it : j_object) - { - // "it" is of type json::reference and has no key() member - std::cout << "value: " << it << '\n'; - } - @endcode - - Range-based for loop with `items()` function: - - @code{cpp} - for (auto& el : j_object.items()) - { - std::cout << "key: " << el.key() << ", value:" << el.value() << '\n'; - } - @endcode - - The `items()` function also allows to use - [structured bindings](https://en.cppreference.com/w/cpp/language/structured_binding) - (C++17): - - @code{cpp} - for (auto& [key, val] : j_object.items()) - { - std::cout << "key: " << key << ", value:" << val << '\n'; - } - @endcode - - @note When iterating over an array, `key()` will return the index of the - element as string (see example). For primitive types (e.g., numbers), - `key()` returns an empty string. - - @warning Using `items()` on temporary objects is dangerous. Make sure the - object's lifetime exeeds the iteration. See - for more - information. - - @return iteration proxy object wrapping @a ref with an interface to use in - range-based for loops - - @liveexample{The following code shows how the function is used.,items} - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 3.1.0, structured bindings support since 3.5.0. - */ - iteration_proxy items() noexcept - { - return iteration_proxy(*this); - } - - /*! - @copydoc items() - */ - iteration_proxy items() const noexcept - { - return iteration_proxy(*this); - } - - /// @} - - - ////////////// - // capacity // - ////////////// - - /// @name capacity - /// @{ - - /*! - @brief checks whether the container is empty. - - Checks if a JSON value has no elements (i.e. whether its @ref size is `0`). - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `true` - boolean | `false` - string | `false` - number | `false` - binary | `false` - object | result of function `object_t::empty()` - array | result of function `array_t::empty()` - - @liveexample{The following code uses `empty()` to check if a JSON - object contains any elements.,empty} - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their `empty()` functions have constant - complexity. - - @iterators No changes. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @note This function does not return whether a string stored as JSON value - is empty - it returns whether the JSON container itself is empty which is - false in the case of a string. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - Has the semantics of `begin() == end()`. - - @sa @ref size() -- returns the number of elements - - @since version 1.0.0 - */ - bool empty() const noexcept - { - switch (m_type) - { - case value_t::null: - { - // null values are empty - return true; - } - - case value_t::array: - { - // delegate call to array_t::empty() - return m_value.array->empty(); - } - - case value_t::object: - { - // delegate call to object_t::empty() - return m_value.object->empty(); - } - - default: - { - // all other types are nonempty - return false; - } - } - } - - /*! - @brief returns the number of elements - - Returns the number of elements in a JSON value. - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `0` - boolean | `1` - string | `1` - number | `1` - binary | `1` - object | result of function object_t::size() - array | result of function array_t::size() - - @liveexample{The following code calls `size()` on the different value - types.,size} - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their size() functions have constant - complexity. - - @iterators No changes. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @note This function does not return the length of a string stored as JSON - value - it returns the number of elements in the JSON value which is 1 in - the case of a string. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - Has the semantics of `std::distance(begin(), end())`. - - @sa @ref empty() -- checks whether the container is empty - @sa @ref max_size() -- returns the maximal number of elements - - @since version 1.0.0 - */ - size_type size() const noexcept - { - switch (m_type) - { - case value_t::null: - { - // null values are empty - return 0; - } - - case value_t::array: - { - // delegate call to array_t::size() - return m_value.array->size(); - } - - case value_t::object: - { - // delegate call to object_t::size() - return m_value.object->size(); - } - - default: - { - // all other types have size 1 - return 1; - } - } - } - - /*! - @brief returns the maximum possible number of elements - - Returns the maximum number of elements a JSON value is able to hold due to - system or library implementation limitations, i.e. `std::distance(begin(), - end())` for the JSON value. - - @return The return value depends on the different types and is - defined as follows: - Value type | return value - ----------- | ------------- - null | `0` (same as `size()`) - boolean | `1` (same as `size()`) - string | `1` (same as `size()`) - number | `1` (same as `size()`) - binary | `1` (same as `size()`) - object | result of function `object_t::max_size()` - array | result of function `array_t::max_size()` - - @liveexample{The following code calls `max_size()` on the different value - types. Note the output is implementation specific.,max_size} - - @complexity Constant, as long as @ref array_t and @ref object_t satisfy - the Container concept; that is, their `max_size()` functions have constant - complexity. - - @iterators No changes. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @requirement This function helps `basic_json` satisfying the - [Container](https://en.cppreference.com/w/cpp/named_req/Container) - requirements: - - The complexity is constant. - - Has the semantics of returning `b.size()` where `b` is the largest - possible JSON value. - - @sa @ref size() -- returns the number of elements - - @since version 1.0.0 - */ - size_type max_size() const noexcept - { - switch (m_type) - { - case value_t::array: - { - // delegate call to array_t::max_size() - return m_value.array->max_size(); - } - - case value_t::object: - { - // delegate call to object_t::max_size() - return m_value.object->max_size(); - } - - default: - { - // all other types have max_size() == size() - return size(); - } - } - } - - /// @} - - - /////////////// - // modifiers // - /////////////// - - /// @name modifiers - /// @{ - - /*! - @brief clears the contents - - Clears the content of a JSON value and resets it to the default value as - if @ref basic_json(value_t) would have been called with the current value - type from @ref type(): - - Value type | initial value - ----------- | ------------- - null | `null` - boolean | `false` - string | `""` - number | `0` - binary | An empty byte vector - object | `{}` - array | `[]` - - @post Has the same effect as calling - @code {.cpp} - *this = basic_json(type()); - @endcode - - @liveexample{The example below shows the effect of `clear()` to different - JSON types.,clear} - - @complexity Linear in the size of the JSON value. - - @iterators All iterators, pointers and references related to this container - are invalidated. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @sa @ref basic_json(value_t) -- constructor that creates an object with the - same value than calling `clear()` - - @since version 1.0.0 - */ - void clear() noexcept - { - switch (m_type) - { - case value_t::number_integer: - { - m_value.number_integer = 0; - break; - } - - case value_t::number_unsigned: - { - m_value.number_unsigned = 0; - break; - } - - case value_t::number_float: - { - m_value.number_float = 0.0; - break; - } - - case value_t::boolean: - { - m_value.boolean = false; - break; - } - - case value_t::string: - { - m_value.string->clear(); - break; - } - - case value_t::binary: - { - m_value.binary->clear(); - break; - } - - case value_t::array: - { - m_value.array->clear(); - break; - } - - case value_t::object: - { - m_value.object->clear(); - break; - } - - default: - break; - } - } - - /*! - @brief add an object to an array - - Appends the given element @a val to the end of the JSON value. If the - function is called on a JSON null value, an empty array is created before - appending @a val. - - @param[in] val the value to add to the JSON array - - @throw type_error.308 when called on a type other than JSON array or - null; example: `"cannot use push_back() with number"` - - @complexity Amortized constant. - - @liveexample{The example shows how `push_back()` and `+=` can be used to - add elements to a JSON array. Note how the `null` value was silently - converted to a JSON array.,push_back} - - @since version 1.0.0 - */ - void push_back(basic_json&& val) - { - // push_back only works for null objects or arrays - if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array()))) - { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array (move semantics) - m_value.array->push_back(std::move(val)); - // if val is moved from, basic_json move constructor marks it null so we do not call the destructor - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - reference operator+=(basic_json&& val) - { - push_back(std::move(val)); - return *this; - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - void push_back(const basic_json& val) - { - // push_back only works for null objects or arrays - if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array()))) - { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array - m_value.array->push_back(val); - } - - /*! - @brief add an object to an array - @copydoc push_back(basic_json&&) - */ - reference operator+=(const basic_json& val) - { - push_back(val); - return *this; - } - - /*! - @brief add an object to an object - - Inserts the given element @a val to the JSON object. If the function is - called on a JSON null value, an empty object is created before inserting - @a val. - - @param[in] val the value to add to the JSON object - - @throw type_error.308 when called on a type other than JSON object or - null; example: `"cannot use push_back() with number"` - - @complexity Logarithmic in the size of the container, O(log(`size()`)). - - @liveexample{The example shows how `push_back()` and `+=` can be used to - add elements to a JSON object. Note how the `null` value was silently - converted to a JSON object.,push_back__object_t__value} - - @since version 1.0.0 - */ - void push_back(const typename object_t::value_type& val) - { - // push_back only works for null objects or objects - if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object()))) - { - JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); - } - - // transform null object into an object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // add element to array - m_value.object->insert(val); - } - - /*! - @brief add an object to an object - @copydoc push_back(const typename object_t::value_type&) - */ - reference operator+=(const typename object_t::value_type& val) - { - push_back(val); - return *this; - } - - /*! - @brief add an object to an object - - This function allows to use `push_back` with an initializer list. In case - - 1. the current value is an object, - 2. the initializer list @a init contains only two elements, and - 3. the first element of @a init is a string, - - @a init is converted into an object element and added using - @ref push_back(const typename object_t::value_type&). Otherwise, @a init - is converted to a JSON value and added using @ref push_back(basic_json&&). - - @param[in] init an initializer list - - @complexity Linear in the size of the initializer list @a init. - - @note This function is required to resolve an ambiguous overload error, - because pairs like `{"key", "value"}` can be both interpreted as - `object_t::value_type` or `std::initializer_list`, see - https://github.com/nlohmann/json/issues/235 for more information. - - @liveexample{The example shows how initializer lists are treated as - objects when possible.,push_back__initializer_list} - */ - void push_back(initializer_list_t init) - { - if (is_object() && init.size() == 2 && (*init.begin())->is_string()) - { - basic_json&& key = init.begin()->moved_or_copied(); - push_back(typename object_t::value_type( - std::move(key.get_ref()), (init.begin() + 1)->moved_or_copied())); - } - else - { - push_back(basic_json(init)); - } - } - - /*! - @brief add an object to an object - @copydoc push_back(initializer_list_t) - */ - reference operator+=(initializer_list_t init) - { - push_back(init); - return *this; - } - - /*! - @brief add an object to an array - - Creates a JSON value from the passed parameters @a args to the end of the - JSON value. If the function is called on a JSON null value, an empty array - is created before appending the value created from @a args. - - @param[in] args arguments to forward to a constructor of @ref basic_json - @tparam Args compatible types to create a @ref basic_json object - - @return reference to the inserted element - - @throw type_error.311 when called on a type other than JSON array or - null; example: `"cannot use emplace_back() with number"` - - @complexity Amortized constant. - - @liveexample{The example shows how `push_back()` can be used to add - elements to a JSON array. Note how the `null` value was silently converted - to a JSON array.,emplace_back} - - @since version 2.0.8, returns reference since 3.7.0 - */ - template - reference emplace_back(Args&& ... args) - { - // emplace_back only works for null objects or arrays - if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array()))) - { - JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + std::string(type_name()))); - } - - // transform null object into an array - if (is_null()) - { - m_type = value_t::array; - m_value = value_t::array; - assert_invariant(); - } - - // add element to array (perfect forwarding) -#ifdef JSON_HAS_CPP_17 - return m_value.array->emplace_back(std::forward(args)...); -#else - m_value.array->emplace_back(std::forward(args)...); - return m_value.array->back(); -#endif - } - - /*! - @brief add an object to an object if key does not exist - - Inserts a new element into a JSON object constructed in-place with the - given @a args if there is no element with the key in the container. If the - function is called on a JSON null value, an empty object is created before - appending the value created from @a args. - - @param[in] args arguments to forward to a constructor of @ref basic_json - @tparam Args compatible types to create a @ref basic_json object - - @return a pair consisting of an iterator to the inserted element, or the - already-existing element if no insertion happened, and a bool - denoting whether the insertion took place. - - @throw type_error.311 when called on a type other than JSON object or - null; example: `"cannot use emplace() with number"` - - @complexity Logarithmic in the size of the container, O(log(`size()`)). - - @liveexample{The example shows how `emplace()` can be used to add elements - to a JSON object. Note how the `null` value was silently converted to a - JSON object. Further note how no value is added if there was already one - value stored with the same key.,emplace} - - @since version 2.0.8 - */ - template - std::pair emplace(Args&& ... args) - { - // emplace only works for null objects or arrays - if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object()))) - { - JSON_THROW(type_error::create(311, "cannot use emplace() with " + std::string(type_name()))); - } - - // transform null object into an object - if (is_null()) - { - m_type = value_t::object; - m_value = value_t::object; - assert_invariant(); - } - - // add element to array (perfect forwarding) - auto res = m_value.object->emplace(std::forward(args)...); - // create result iterator and set iterator to the result of emplace - auto it = begin(); - it.m_it.object_iterator = res.first; - - // return pair of iterator and boolean - return {it, res.second}; - } - - /// Helper for insertion of an iterator - /// @note: This uses std::distance to support GCC 4.8, - /// see https://github.com/nlohmann/json/pull/1257 - template - iterator insert_iterator(const_iterator pos, Args&& ... args) - { - iterator result(this); - JSON_ASSERT(m_value.array != nullptr); - - auto insert_pos = std::distance(m_value.array->begin(), pos.m_it.array_iterator); - m_value.array->insert(pos.m_it.array_iterator, std::forward(args)...); - result.m_it.array_iterator = m_value.array->begin() + insert_pos; - - // This could have been written as: - // result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val); - // but the return value of insert is missing in GCC 4.8, so it is written this way instead. - - return result; - } - - /*! - @brief inserts element - - Inserts element @a val before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] val element to insert - @return iterator pointing to the inserted @a val. - - @throw type_error.309 if called on JSON values other than arrays; - example: `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @complexity Constant plus linear in the distance between @a pos and end of - the container. - - @liveexample{The example shows how `insert()` is used.,insert} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, const basic_json& val) - { - // insert only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - // check if iterator pos fits to this JSON value - if (JSON_HEDLEY_UNLIKELY(pos.m_object != this)) - { - JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - return insert_iterator(pos, val); - } - - JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); - } - - /*! - @brief inserts element - @copydoc insert(const_iterator, const basic_json&) - */ - iterator insert(const_iterator pos, basic_json&& val) - { - return insert(pos, val); - } - - /*! - @brief inserts elements - - Inserts @a cnt copies of @a val before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] cnt number of copies of @a val to insert - @param[in] val element to insert - @return iterator pointing to the first element inserted, or @a pos if - `cnt==0` - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @complexity Linear in @a cnt plus linear in the distance between @a pos - and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__count} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, size_type cnt, const basic_json& val) - { - // insert only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - // check if iterator pos fits to this JSON value - if (JSON_HEDLEY_UNLIKELY(pos.m_object != this)) - { - JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - return insert_iterator(pos, cnt, val); - } - - JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); - } - - /*! - @brief inserts elements - - Inserts elements from range `[first, last)` before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] first begin of the range of elements to insert - @param[in] last end of the range of elements to insert - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - @throw invalid_iterator.210 if @a first and @a last do not belong to the - same JSON value; example: `"iterators do not fit"` - @throw invalid_iterator.211 if @a first or @a last are iterators into - container for which insert is called; example: `"passed iterators may not - belong to container"` - - @return iterator pointing to the first element inserted, or @a pos if - `first==last` - - @complexity Linear in `std::distance(first, last)` plus linear in the - distance between @a pos and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__range} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, const_iterator first, const_iterator last) - { - // insert only works for arrays - if (JSON_HEDLEY_UNLIKELY(!is_array())) - { - JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); - } - - // check if iterator pos fits to this JSON value - if (JSON_HEDLEY_UNLIKELY(pos.m_object != this)) - { - JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); - } - - // check if range iterators belong to the same JSON object - if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object)) - { - JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); - } - - if (JSON_HEDLEY_UNLIKELY(first.m_object == this)) - { - JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container")); - } - - // insert to array and return iterator - return insert_iterator(pos, first.m_it.array_iterator, last.m_it.array_iterator); - } - - /*! - @brief inserts elements - - Inserts elements from initializer list @a ilist before iterator @a pos. - - @param[in] pos iterator before which the content will be inserted; may be - the end() iterator - @param[in] ilist initializer list to insert the values from - - @throw type_error.309 if called on JSON values other than arrays; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if @a pos is not an iterator of *this; - example: `"iterator does not fit current value"` - - @return iterator pointing to the first element inserted, or @a pos if - `ilist` is empty - - @complexity Linear in `ilist.size()` plus linear in the distance between - @a pos and end of the container. - - @liveexample{The example shows how `insert()` is used.,insert__ilist} - - @since version 1.0.0 - */ - iterator insert(const_iterator pos, initializer_list_t ilist) - { - // insert only works for arrays - if (JSON_HEDLEY_UNLIKELY(!is_array())) - { - JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); - } - - // check if iterator pos fits to this JSON value - if (JSON_HEDLEY_UNLIKELY(pos.m_object != this)) - { - JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); - } - - // insert to array and return iterator - return insert_iterator(pos, ilist.begin(), ilist.end()); - } - - /*! - @brief inserts elements - - Inserts elements from range `[first, last)`. - - @param[in] first begin of the range of elements to insert - @param[in] last end of the range of elements to insert - - @throw type_error.309 if called on JSON values other than objects; example: - `"cannot use insert() with string"` - @throw invalid_iterator.202 if iterator @a first or @a last does does not - point to an object; example: `"iterators first and last must point to - objects"` - @throw invalid_iterator.210 if @a first and @a last do not belong to the - same JSON value; example: `"iterators do not fit"` - - @complexity Logarithmic: `O(N*log(size() + N))`, where `N` is the number - of elements to insert. - - @liveexample{The example shows how `insert()` is used.,insert__range_object} - - @since version 3.0.0 - */ - void insert(const_iterator first, const_iterator last) - { - // insert only works for objects - if (JSON_HEDLEY_UNLIKELY(!is_object())) - { - JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); - } - - // check if range iterators belong to the same JSON object - if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object)) - { - JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); - } - - // passed iterators must belong to objects - if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object())) - { - JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); - } - - m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); - } - - /*! - @brief updates a JSON object from another object, overwriting existing keys - - Inserts all values from JSON object @a j and overwrites existing keys. - - @param[in] j JSON object to read values from - - @throw type_error.312 if called on JSON values other than objects; example: - `"cannot use update() with string"` - - @complexity O(N*log(size() + N)), where N is the number of elements to - insert. - - @liveexample{The example shows how `update()` is used.,update} - - @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update - - @since version 3.0.0 - */ - void update(const_reference j) - { - // implicitly convert null value to an empty object - if (is_null()) - { - m_type = value_t::object; - m_value.object = create(); - assert_invariant(); - } - - if (JSON_HEDLEY_UNLIKELY(!is_object())) - { - JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); - } - if (JSON_HEDLEY_UNLIKELY(!j.is_object())) - { - JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(j.type_name()))); - } - - for (auto it = j.cbegin(); it != j.cend(); ++it) - { - m_value.object->operator[](it.key()) = it.value(); - } - } - - /*! - @brief updates a JSON object from another object, overwriting existing keys - - Inserts all values from from range `[first, last)` and overwrites existing - keys. - - @param[in] first begin of the range of elements to insert - @param[in] last end of the range of elements to insert - - @throw type_error.312 if called on JSON values other than objects; example: - `"cannot use update() with string"` - @throw invalid_iterator.202 if iterator @a first or @a last does does not - point to an object; example: `"iterators first and last must point to - objects"` - @throw invalid_iterator.210 if @a first and @a last do not belong to the - same JSON value; example: `"iterators do not fit"` - - @complexity O(N*log(size() + N)), where N is the number of elements to - insert. - - @liveexample{The example shows how `update()` is used__range.,update} - - @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update - - @since version 3.0.0 - */ - void update(const_iterator first, const_iterator last) - { - // implicitly convert null value to an empty object - if (is_null()) - { - m_type = value_t::object; - m_value.object = create(); - assert_invariant(); - } - - if (JSON_HEDLEY_UNLIKELY(!is_object())) - { - JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); - } - - // check if range iterators belong to the same JSON object - if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object)) - { - JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); - } - - // passed iterators must belong to objects - if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object() - || !last.m_object->is_object())) - { - JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); - } - - for (auto it = first; it != last; ++it) - { - m_value.object->operator[](it.key()) = it.value(); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of the JSON value with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other JSON value to exchange the contents with - - @complexity Constant. - - @liveexample{The example below shows how JSON values can be swapped with - `swap()`.,swap__reference} - - @since version 1.0.0 - */ - void swap(reference other) noexcept ( - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value&& - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value - ) - { - std::swap(m_type, other.m_type); - std::swap(m_value, other.m_value); - assert_invariant(); - } - - /*! - @brief exchanges the values - - Exchanges the contents of the JSON value from @a left with those of @a right. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. implemented as a friend function callable via ADL. - - @param[in,out] left JSON value to exchange the contents with - @param[in,out] right JSON value to exchange the contents with - - @complexity Constant. - - @liveexample{The example below shows how JSON values can be swapped with - `swap()`.,swap__reference} - - @since version 1.0.0 - */ - friend void swap(reference left, reference right) noexcept ( - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value&& - std::is_nothrow_move_constructible::value&& - std::is_nothrow_move_assignable::value - ) - { - left.swap(right); - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON array with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other array to exchange the contents with - - @throw type_error.310 when JSON value is not an array; example: `"cannot - use swap() with string"` - - @complexity Constant. - - @liveexample{The example below shows how arrays can be swapped with - `swap()`.,swap__array_t} - - @since version 1.0.0 - */ - void swap(array_t& other) - { - // swap only works for arrays - if (JSON_HEDLEY_LIKELY(is_array())) - { - std::swap(*(m_value.array), other); - } - else - { - JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON object with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other object to exchange the contents with - - @throw type_error.310 when JSON value is not an object; example: - `"cannot use swap() with string"` - - @complexity Constant. - - @liveexample{The example below shows how objects can be swapped with - `swap()`.,swap__object_t} - - @since version 1.0.0 - */ - void swap(object_t& other) - { - // swap only works for objects - if (JSON_HEDLEY_LIKELY(is_object())) - { - std::swap(*(m_value.object), other); - } - else - { - JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON string with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other string to exchange the contents with - - @throw type_error.310 when JSON value is not a string; example: `"cannot - use swap() with boolean"` - - @complexity Constant. - - @liveexample{The example below shows how strings can be swapped with - `swap()`.,swap__string_t} - - @since version 1.0.0 - */ - void swap(string_t& other) - { - // swap only works for strings - if (JSON_HEDLEY_LIKELY(is_string())) - { - std::swap(*(m_value.string), other); - } - else - { - JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); - } - } - - /*! - @brief exchanges the values - - Exchanges the contents of a JSON string with those of @a other. Does not - invoke any move, copy, or swap operations on individual elements. All - iterators and references remain valid. The past-the-end iterator is - invalidated. - - @param[in,out] other binary to exchange the contents with - - @throw type_error.310 when JSON value is not a string; example: `"cannot - use swap() with boolean"` - - @complexity Constant. - - @liveexample{The example below shows how strings can be swapped with - `swap()`.,swap__binary_t} - - @since version 3.8.0 - */ - void swap(binary_t& other) - { - // swap only works for strings - if (JSON_HEDLEY_LIKELY(is_binary())) - { - std::swap(*(m_value.binary), other); - } - else - { - JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); - } - } - - /// @copydoc swap(binary_t) - void swap(typename binary_t::container_type& other) - { - // swap only works for strings - if (JSON_HEDLEY_LIKELY(is_binary())) - { - std::swap(*(m_value.binary), other); - } - else - { - JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); - } - } - - /// @} - - public: - ////////////////////////////////////////// - // lexicographical comparison operators // - ////////////////////////////////////////// - - /// @name lexicographical comparison operators - /// @{ - - /*! - @brief comparison: equal - - Compares two JSON values for equality according to the following rules: - - Two JSON values are equal if (1) they are from the same type and (2) - their stored values are the same according to their respective - `operator==`. - - Integer and floating-point numbers are automatically converted before - comparison. Note that two NaN values are always treated as unequal. - - Two JSON null values are equal. - - @note Floating-point inside JSON values numbers are compared with - `json::number_float_t::operator==` which is `double::operator==` by - default. To compare floating-point while respecting an epsilon, an alternative - [comparison function](https://github.com/mariokonrad/marnav/blob/master/include/marnav/math/floatingpoint.hpp#L34-#L39) - could be used, for instance - @code {.cpp} - template::value, T>::type> - inline bool is_same(T a, T b, T epsilon = std::numeric_limits::epsilon()) noexcept - { - return std::abs(a - b) <= epsilon; - } - @endcode - Or you can self-defined operator equal function like this: - @code {.cpp} - bool my_equal(const_reference lhs, const_reference rhs) { - const auto lhs_type lhs.type(); - const auto rhs_type rhs.type(); - if (lhs_type == rhs_type) { - switch(lhs_type) - // self_defined case - case value_t::number_float: - return std::abs(lhs - rhs) <= std::numeric_limits::epsilon(); - // other cases remain the same with the original - ... - } - ... - } - @endcode - - @note NaN values never compare equal to themselves or to other NaN values. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether the values @a lhs and @a rhs are equal - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @complexity Linear. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__equal} - - @since version 1.0.0 - */ - friend bool operator==(const_reference lhs, const_reference rhs) noexcept - { - const auto lhs_type = lhs.type(); - const auto rhs_type = rhs.type(); - - if (lhs_type == rhs_type) - { - switch (lhs_type) - { - case value_t::array: - return *lhs.m_value.array == *rhs.m_value.array; - - case value_t::object: - return *lhs.m_value.object == *rhs.m_value.object; - - case value_t::null: - return true; - - case value_t::string: - return *lhs.m_value.string == *rhs.m_value.string; - - case value_t::boolean: - return lhs.m_value.boolean == rhs.m_value.boolean; - - case value_t::number_integer: - return lhs.m_value.number_integer == rhs.m_value.number_integer; - - case value_t::number_unsigned: - return lhs.m_value.number_unsigned == rhs.m_value.number_unsigned; - - case value_t::number_float: - return lhs.m_value.number_float == rhs.m_value.number_float; - - case value_t::binary: - return *lhs.m_value.binary == *rhs.m_value.binary; - - default: - return false; - } - } - else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_integer) == rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float && rhs_type == value_t::number_integer) - { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_integer); - } - else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float && rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_float == static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_integer) - { - return static_cast(lhs.m_value.number_unsigned) == rhs.m_value.number_integer; - } - else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_integer == static_cast(rhs.m_value.number_unsigned); - } - - return false; - } - - /*! - @brief comparison: equal - @copydoc operator==(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator==(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs == basic_json(rhs); - } - - /*! - @brief comparison: equal - @copydoc operator==(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator==(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) == rhs; - } - - /*! - @brief comparison: not equal - - Compares two JSON values for inequality by calculating `not (lhs == rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether the values @a lhs and @a rhs are not equal - - @complexity Linear. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__notequal} - - @since version 1.0.0 - */ - friend bool operator!=(const_reference lhs, const_reference rhs) noexcept - { - return !(lhs == rhs); - } - - /*! - @brief comparison: not equal - @copydoc operator!=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator!=(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs != basic_json(rhs); - } - - /*! - @brief comparison: not equal - @copydoc operator!=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator!=(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) != rhs; - } - - /*! - @brief comparison: less than - - Compares whether one JSON value @a lhs is less than another JSON value @a - rhs according to the following rules: - - If @a lhs and @a rhs have the same type, the values are compared using - the default `<` operator. - - Integer and floating-point numbers are automatically converted before - comparison - - In case @a lhs and @a rhs have different types, the values are ignored - and the order of the types is considered, see - @ref operator<(const value_t, const value_t). - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is less than @a rhs - - @complexity Linear. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__less} - - @since version 1.0.0 - */ - friend bool operator<(const_reference lhs, const_reference rhs) noexcept - { - const auto lhs_type = lhs.type(); - const auto rhs_type = rhs.type(); - - if (lhs_type == rhs_type) - { - switch (lhs_type) - { - case value_t::array: - // note parentheses are necessary, see - // https://github.com/nlohmann/json/issues/1530 - return (*lhs.m_value.array) < (*rhs.m_value.array); - - case value_t::object: - return (*lhs.m_value.object) < (*rhs.m_value.object); - - case value_t::null: - return false; - - case value_t::string: - return (*lhs.m_value.string) < (*rhs.m_value.string); - - case value_t::boolean: - return (lhs.m_value.boolean) < (rhs.m_value.boolean); - - case value_t::number_integer: - return (lhs.m_value.number_integer) < (rhs.m_value.number_integer); - - case value_t::number_unsigned: - return (lhs.m_value.number_unsigned) < (rhs.m_value.number_unsigned); - - case value_t::number_float: - return (lhs.m_value.number_float) < (rhs.m_value.number_float); - - case value_t::binary: - return (*lhs.m_value.binary) < (*rhs.m_value.binary); - - default: - return false; - } - } - else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_integer) < rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float && rhs_type == value_t::number_integer) - { - return lhs.m_value.number_float < static_cast(rhs.m_value.number_integer); - } - else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_float) - { - return static_cast(lhs.m_value.number_unsigned) < rhs.m_value.number_float; - } - else if (lhs_type == value_t::number_float && rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_float < static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_unsigned) - { - return lhs.m_value.number_integer < static_cast(rhs.m_value.number_unsigned); - } - else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_integer) - { - return static_cast(lhs.m_value.number_unsigned) < rhs.m_value.number_integer; - } - - // We only reach this line if we cannot compare values. In that case, - // we compare types. Note we have to call the operator explicitly, - // because MSVC has problems otherwise. - return operator<(lhs_type, rhs_type); - } - - /*! - @brief comparison: less than - @copydoc operator<(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator<(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs < basic_json(rhs); - } - - /*! - @brief comparison: less than - @copydoc operator<(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator<(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) < rhs; - } - - /*! - @brief comparison: less than or equal - - Compares whether one JSON value @a lhs is less than or equal to another - JSON value by calculating `not (rhs < lhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is less than or equal to @a rhs - - @complexity Linear. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__greater} - - @since version 1.0.0 - */ - friend bool operator<=(const_reference lhs, const_reference rhs) noexcept - { - return !(rhs < lhs); - } - - /*! - @brief comparison: less than or equal - @copydoc operator<=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator<=(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs <= basic_json(rhs); - } - - /*! - @brief comparison: less than or equal - @copydoc operator<=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator<=(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) <= rhs; - } - - /*! - @brief comparison: greater than - - Compares whether one JSON value @a lhs is greater than another - JSON value by calculating `not (lhs <= rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is greater than to @a rhs - - @complexity Linear. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__lessequal} - - @since version 1.0.0 - */ - friend bool operator>(const_reference lhs, const_reference rhs) noexcept - { - return !(lhs <= rhs); - } - - /*! - @brief comparison: greater than - @copydoc operator>(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator>(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs > basic_json(rhs); - } - - /*! - @brief comparison: greater than - @copydoc operator>(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator>(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) > rhs; - } - - /*! - @brief comparison: greater than or equal - - Compares whether one JSON value @a lhs is greater than or equal to another - JSON value by calculating `not (lhs < rhs)`. - - @param[in] lhs first JSON value to consider - @param[in] rhs second JSON value to consider - @return whether @a lhs is greater than or equal to @a rhs - - @complexity Linear. - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @liveexample{The example demonstrates comparing several JSON - types.,operator__greaterequal} - - @since version 1.0.0 - */ - friend bool operator>=(const_reference lhs, const_reference rhs) noexcept - { - return !(lhs < rhs); - } - - /*! - @brief comparison: greater than or equal - @copydoc operator>=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator>=(const_reference lhs, const ScalarType rhs) noexcept - { - return lhs >= basic_json(rhs); - } - - /*! - @brief comparison: greater than or equal - @copydoc operator>=(const_reference, const_reference) - */ - template::value, int>::type = 0> - friend bool operator>=(const ScalarType lhs, const_reference rhs) noexcept - { - return basic_json(lhs) >= rhs; - } - - /// @} - - /////////////////// - // serialization // - /////////////////// - - /// @name serialization - /// @{ - - /*! - @brief serialize to stream - - Serialize the given JSON value @a j to the output stream @a o. The JSON - value will be serialized using the @ref dump member function. - - - The indentation of the output can be controlled with the member variable - `width` of the output stream @a o. For instance, using the manipulator - `std::setw(4)` on @a o sets the indentation level to `4` and the - serialization result is the same as calling `dump(4)`. - - - The indentation character can be controlled with the member variable - `fill` of the output stream @a o. For instance, the manipulator - `std::setfill('\\t')` sets indentation to use a tab character rather than - the default space character. - - @param[in,out] o stream to serialize to - @param[in] j JSON value to serialize - - @return the stream @a o - - @throw type_error.316 if a string stored inside the JSON value is not - UTF-8 encoded - - @complexity Linear. - - @liveexample{The example below shows the serialization with different - parameters to `width` to adjust the indentation level.,operator_serialize} - - @since version 1.0.0; indentation character added in version 3.0.0 - */ - friend std::ostream& operator<<(std::ostream& o, const basic_json& j) - { - // read width member and use it as indentation parameter if nonzero - const bool pretty_print = o.width() > 0; - const auto indentation = pretty_print ? o.width() : 0; - - // reset width to 0 for subsequent calls to this stream - o.width(0); - - // do the actual serialization - serializer s(detail::output_adapter(o), o.fill()); - s.dump(j, pretty_print, false, static_cast(indentation)); - return o; - } - - /*! - @brief serialize to stream - @deprecated This stream operator is deprecated and will be removed in - future 4.0.0 of the library. Please use - @ref operator<<(std::ostream&, const basic_json&) - instead; that is, replace calls like `j >> o;` with `o << j;`. - @since version 1.0.0; deprecated since version 3.0.0 - */ - JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator<<(std::ostream&, const basic_json&)) - friend std::ostream& operator>>(const basic_json& j, std::ostream& o) - { - return o << j; - } - - /// @} - - - ///////////////////// - // deserialization // - ///////////////////// - - /// @name deserialization - /// @{ - - /*! - @brief deserialize from a compatible input - - @tparam InputType A compatible input, for instance - - an std::istream object - - a FILE pointer - - a C-style array of characters - - a pointer to a null-terminated string of single byte characters - - an object obj for which begin(obj) and end(obj) produces a valid pair of - iterators. - - @param[in] i input to read from - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored and treated - like whitespace (true) or yield a parse error (true); (optional, false by - default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.101 if a parse error occurs; example: `""unexpected end - of input; expected string literal""` - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the parser callback function - @a cb or reading from the input @a i has a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `parse()` function reading - from an array.,parse__array__parser_callback_t} - - @liveexample{The example below demonstrates the `parse()` function with - and without callback function.,parse__string__parser_callback_t} - - @liveexample{The example below demonstrates the `parse()` function with - and without callback function.,parse__istream__parser_callback_t} - - @liveexample{The example below demonstrates the `parse()` function reading - from a contiguous container.,parse__contiguouscontainer__parser_callback_t} - - @since version 2.0.3 (contiguous containers); version 3.9.0 allowed to - ignore comments. - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json parse(InputType&& i, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true, - const bool ignore_comments = false) - { - basic_json result; - parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions, ignore_comments).parse(true, result); - return result; - } - - /*! - @brief deserialize from a pair of character iterators - - The value_type of the iterator must be a integral type with size of 1, 2 or - 4 bytes, which will be interpreted respectively as UTF-8, UTF-16 and UTF-32. - - @param[in] first iterator to start of character range - @param[in] last iterator to end of character range - @param[in] cb a parser callback function of type @ref parser_callback_t - which is used to control the deserialization by filtering unwanted values - (optional) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - @param[in] ignore_comments whether comments should be ignored and treated - like whitespace (true) or yield a parse error (true); (optional, false by - default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.101 if a parse error occurs; example: `""unexpected end - of input; expected string literal""` - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json parse(IteratorType first, - IteratorType last, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true, - const bool ignore_comments = false) - { - basic_json result; - parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result); - return result; - } - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len)) - static basic_json parse(detail::span_input_adapter&& i, - const parser_callback_t cb = nullptr, - const bool allow_exceptions = true, - const bool ignore_comments = false) - { - basic_json result; - parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result); - return result; - } - - /*! - @brief check if the input is valid JSON - - Unlike the @ref parse(InputType&&, const parser_callback_t,const bool) - function, this function neither throws an exception in case of invalid JSON - input (i.e., a parse error) nor creates diagnostic information. - - @tparam InputType A compatible input, for instance - - an std::istream object - - a FILE pointer - - a C-style array of characters - - a pointer to a null-terminated string of single byte characters - - an object obj for which begin(obj) and end(obj) produces a valid pair of - iterators. - - @param[in] i input to read from - @param[in] ignore_comments whether comments should be ignored and treated - like whitespace (true) or yield a parse error (true); (optional, false by - default) - - @return Whether the input read from @a i is valid JSON. - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `accept()` function reading - from a string.,accept__string} - */ - template - static bool accept(InputType&& i, - const bool ignore_comments = false) - { - return parser(detail::input_adapter(std::forward(i)), nullptr, false, ignore_comments).accept(true); - } - - template - static bool accept(IteratorType first, IteratorType last, - const bool ignore_comments = false) - { - return parser(detail::input_adapter(std::move(first), std::move(last)), nullptr, false, ignore_comments).accept(true); - } - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, accept(ptr, ptr + len)) - static bool accept(detail::span_input_adapter&& i, - const bool ignore_comments = false) - { - return parser(i.get(), nullptr, false, ignore_comments).accept(true); - } - - /*! - @brief generate SAX events - - The SAX event lister must follow the interface of @ref json_sax. - - This function reads from a compatible input. Examples are: - - an std::istream object - - a FILE pointer - - a C-style array of characters - - a pointer to a null-terminated string of single byte characters - - an object obj for which begin(obj) and end(obj) produces a valid pair of - iterators. - - @param[in] i input to read from - @param[in,out] sax SAX event listener - @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) - @param[in] strict whether the input has to be consumed completely - @param[in] ignore_comments whether comments should be ignored and treated - like whitespace (true) or yield a parse error (true); (optional, false by - default); only applies to the JSON file format. - - @return return value of the last processed SAX event - - @throw parse_error.101 if a parse error occurs; example: `""unexpected end - of input; expected string literal""` - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. The complexity can be higher if the SAX consumer @a sax has - a super-linear complexity. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below demonstrates the `sax_parse()` function - reading from string and processing the events with a user-defined SAX - event consumer.,sax_parse} - - @since version 3.2.0 - */ - template - JSON_HEDLEY_NON_NULL(2) - static bool sax_parse(InputType&& i, SAX* sax, - input_format_t format = input_format_t::json, - const bool strict = true, - const bool ignore_comments = false) - { - auto ia = detail::input_adapter(std::forward(i)); - return format == input_format_t::json - ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) - : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); - } - - template - JSON_HEDLEY_NON_NULL(3) - static bool sax_parse(IteratorType first, IteratorType last, SAX* sax, - input_format_t format = input_format_t::json, - const bool strict = true, - const bool ignore_comments = false) - { - auto ia = detail::input_adapter(std::move(first), std::move(last)); - return format == input_format_t::json - ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) - : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); - } - - template - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, sax_parse(ptr, ptr + len, ...)) - JSON_HEDLEY_NON_NULL(2) - static bool sax_parse(detail::span_input_adapter&& i, SAX* sax, - input_format_t format = input_format_t::json, - const bool strict = true, - const bool ignore_comments = false) - { - auto ia = i.get(); - return format == input_format_t::json - ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict) - : detail::binary_reader(std::move(ia)).sax_parse(format, sax, strict); - } - - /*! - @brief deserialize from stream - @deprecated This stream operator is deprecated and will be removed in - version 4.0.0 of the library. Please use - @ref operator>>(std::istream&, basic_json&) - instead; that is, replace calls like `j << i;` with `i >> j;`. - @since version 1.0.0; deprecated since version 3.0.0 - */ - JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator>>(std::istream&, basic_json&)) - friend std::istream& operator<<(basic_json& j, std::istream& i) - { - return operator>>(i, j); - } - - /*! - @brief deserialize from stream - - Deserializes an input stream to a JSON value. - - @param[in,out] i input stream to read a serialized JSON value from - @param[in,out] j JSON value to write the deserialized input to - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - - @complexity Linear in the length of the input. The parser is a predictive - LL(1) parser. - - @note A UTF-8 byte order mark is silently ignored. - - @liveexample{The example below shows how a JSON value is constructed by - reading a serialization from a stream.,operator_deserialize} - - @sa parse(std::istream&, const parser_callback_t) for a variant with a - parser callback function to filter values while parsing - - @since version 1.0.0 - */ - friend std::istream& operator>>(std::istream& i, basic_json& j) - { - parser(detail::input_adapter(i)).parse(false, j); - return i; - } - - /// @} - - /////////////////////////// - // convenience functions // - /////////////////////////// - - /*! - @brief return the type as string - - Returns the type name as string to be used in error messages - usually to - indicate that a function was called on a wrong JSON type. - - @return a string representation of a the @a m_type member: - Value type | return value - ----------- | ------------- - null | `"null"` - boolean | `"boolean"` - string | `"string"` - number | `"number"` (for all number types) - object | `"object"` - array | `"array"` - binary | `"binary"` - discarded | `"discarded"` - - @exceptionsafety No-throw guarantee: this function never throws exceptions. - - @complexity Constant. - - @liveexample{The following code exemplifies `type_name()` for all JSON - types.,type_name} - - @sa @ref type() -- return the type of the JSON value - @sa @ref operator value_t() -- return the type of the JSON value (implicit) - - @since version 1.0.0, public since 2.1.0, `const char*` and `noexcept` - since 3.0.0 - */ - JSON_HEDLEY_RETURNS_NON_NULL - const char* type_name() const noexcept - { - { - switch (m_type) - { - case value_t::null: - return "null"; - case value_t::object: - return "object"; - case value_t::array: - return "array"; - case value_t::string: - return "string"; - case value_t::boolean: - return "boolean"; - case value_t::binary: - return "binary"; - case value_t::discarded: - return "discarded"; - default: - return "number"; - } - } - } - - - private: - ////////////////////// - // member variables // - ////////////////////// - - /// the type of the current element - value_t m_type = value_t::null; - - /// the value of the current element - json_value m_value = {}; - - ////////////////////////////////////////// - // binary serialization/deserialization // - ////////////////////////////////////////// - - /// @name binary serialization/deserialization support - /// @{ - - public: - /*! - @brief create a CBOR serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the CBOR (Concise - Binary Object Representation) serialization format. CBOR is a binary - serialization format which aims to be more compact than JSON itself, yet - more efficient to parse. - - The library uses the following mapping from JSON values types to - CBOR types according to the CBOR specification (RFC 7049): - - JSON value type | value/range | CBOR type | first byte - --------------- | ------------------------------------------ | ---------------------------------- | --------------- - null | `null` | Null | 0xF6 - boolean | `true` | True | 0xF5 - boolean | `false` | False | 0xF4 - number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3B - number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3A - number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 - number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 - number_integer | -24..-1 | Negative integer | 0x20..0x37 - number_integer | 0..23 | Integer | 0x00..0x17 - number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1A - number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1B - number_unsigned | 0..23 | Integer | 0x00..0x17 - number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 - number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 - number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1A - number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1B - number_float | *any value representable by a float* | Single-Precision Float | 0xFA - number_float | *any value NOT representable by a float* | Double-Precision Float | 0xFB - string | *length*: 0..23 | UTF-8 string | 0x60..0x77 - string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 - string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 - string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7A - string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7B - array | *size*: 0..23 | array | 0x80..0x97 - array | *size*: 23..255 | array (1 byte follow) | 0x98 - array | *size*: 256..65535 | array (2 bytes follow) | 0x99 - array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9A - array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9B - object | *size*: 0..23 | map | 0xA0..0xB7 - object | *size*: 23..255 | map (1 byte follow) | 0xB8 - object | *size*: 256..65535 | map (2 bytes follow) | 0xB9 - object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xBA - object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xBB - binary | *size*: 0..23 | byte string | 0x40..0x57 - binary | *size*: 23..255 | byte string (1 byte follow) | 0x58 - binary | *size*: 256..65535 | byte string (2 bytes follow) | 0x59 - binary | *size*: 65536..4294967295 | byte string (4 bytes follow) | 0x5A - binary | *size*: 4294967296..18446744073709551615 | byte string (8 bytes follow) | 0x5B - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a CBOR value. - - @note If NaN or Infinity are stored inside a JSON number, they are - serialized properly. This behavior differs from the @ref dump() - function which serializes NaN or Infinity to `null`. - - @note The following CBOR types are not used in the conversion: - - UTF-8 strings terminated by "break" (0x7F) - - arrays terminated by "break" (0x9F) - - maps terminated by "break" (0xBF) - - byte strings terminated by "break" (0x5F) - - date/time (0xC0..0xC1) - - bignum (0xC2..0xC3) - - decimal fraction (0xC4) - - bigfloat (0xC5) - - expected conversions (0xD5..0xD7) - - simple values (0xE0..0xF3, 0xF8) - - undefined (0xF7) - - half-precision floats (0xF9) - - break (0xFF) - - @param[in] j JSON value to serialize - @return CBOR serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in CBOR format.,to_cbor} - - @sa http://cbor.io - @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool, const cbor_tag_handler_t) for the - analogous deserialization - @sa @ref to_msgpack(const basic_json&) for the related MessagePack format - @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the - related UBJSON format - - @since version 2.0.9; compact representation of floating-point numbers - since version 3.8.0 - */ - static std::vector to_cbor(const basic_json& j) - { - std::vector result; - to_cbor(j, result); - return result; - } - - static void to_cbor(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_cbor(j); - } - - static void to_cbor(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_cbor(j); - } - - /*! - @brief create a MessagePack serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the MessagePack - serialization format. MessagePack is a binary serialization format which - aims to be more compact than JSON itself, yet more efficient to parse. - - The library uses the following mapping from JSON values types to - MessagePack types according to the MessagePack specification: - - JSON value type | value/range | MessagePack type | first byte - --------------- | --------------------------------- | ---------------- | ---------- - null | `null` | nil | 0xC0 - boolean | `true` | true | 0xC3 - boolean | `false` | false | 0xC2 - number_integer | -9223372036854775808..-2147483649 | int64 | 0xD3 - number_integer | -2147483648..-32769 | int32 | 0xD2 - number_integer | -32768..-129 | int16 | 0xD1 - number_integer | -128..-33 | int8 | 0xD0 - number_integer | -32..-1 | negative fixint | 0xE0..0xFF - number_integer | 0..127 | positive fixint | 0x00..0x7F - number_integer | 128..255 | uint 8 | 0xCC - number_integer | 256..65535 | uint 16 | 0xCD - number_integer | 65536..4294967295 | uint 32 | 0xCE - number_integer | 4294967296..18446744073709551615 | uint 64 | 0xCF - number_unsigned | 0..127 | positive fixint | 0x00..0x7F - number_unsigned | 128..255 | uint 8 | 0xCC - number_unsigned | 256..65535 | uint 16 | 0xCD - number_unsigned | 65536..4294967295 | uint 32 | 0xCE - number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xCF - number_float | *any value representable by a float* | float 32 | 0xCA - number_float | *any value NOT representable by a float* | float 64 | 0xCB - string | *length*: 0..31 | fixstr | 0xA0..0xBF - string | *length*: 32..255 | str 8 | 0xD9 - string | *length*: 256..65535 | str 16 | 0xDA - string | *length*: 65536..4294967295 | str 32 | 0xDB - array | *size*: 0..15 | fixarray | 0x90..0x9F - array | *size*: 16..65535 | array 16 | 0xDC - array | *size*: 65536..4294967295 | array 32 | 0xDD - object | *size*: 0..15 | fix map | 0x80..0x8F - object | *size*: 16..65535 | map 16 | 0xDE - object | *size*: 65536..4294967295 | map 32 | 0xDF - binary | *size*: 0..255 | bin 8 | 0xC4 - binary | *size*: 256..65535 | bin 16 | 0xC5 - binary | *size*: 65536..4294967295 | bin 32 | 0xC6 - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a MessagePack value. - - @note The following values can **not** be converted to a MessagePack value: - - strings with more than 4294967295 bytes - - byte strings with more than 4294967295 bytes - - arrays with more than 4294967295 elements - - objects with more than 4294967295 elements - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @note If NaN or Infinity are stored inside a JSON number, they are - serialized properly. This behavior differs from the @ref dump() - function which serializes NaN or Infinity to `null`. - - @param[in] j JSON value to serialize - @return MessagePack serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in MessagePack format.,to_msgpack} - - @sa http://msgpack.org - @sa @ref from_msgpack for the analogous deserialization - @sa @ref to_cbor(const basic_json& for the related CBOR format - @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the - related UBJSON format - - @since version 2.0.9 - */ - static std::vector to_msgpack(const basic_json& j) - { - std::vector result; - to_msgpack(j, result); - return result; - } - - static void to_msgpack(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_msgpack(j); - } - - static void to_msgpack(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_msgpack(j); - } - - /*! - @brief create a UBJSON serialization of a given JSON value - - Serializes a given JSON value @a j to a byte vector using the UBJSON - (Universal Binary JSON) serialization format. UBJSON aims to be more compact - than JSON itself, yet more efficient to parse. - - The library uses the following mapping from JSON values types to - UBJSON types according to the UBJSON specification: - - JSON value type | value/range | UBJSON type | marker - --------------- | --------------------------------- | ----------- | ------ - null | `null` | null | `Z` - boolean | `true` | true | `T` - boolean | `false` | false | `F` - number_integer | -9223372036854775808..-2147483649 | int64 | `L` - number_integer | -2147483648..-32769 | int32 | `l` - number_integer | -32768..-129 | int16 | `I` - number_integer | -128..127 | int8 | `i` - number_integer | 128..255 | uint8 | `U` - number_integer | 256..32767 | int16 | `I` - number_integer | 32768..2147483647 | int32 | `l` - number_integer | 2147483648..9223372036854775807 | int64 | `L` - number_unsigned | 0..127 | int8 | `i` - number_unsigned | 128..255 | uint8 | `U` - number_unsigned | 256..32767 | int16 | `I` - number_unsigned | 32768..2147483647 | int32 | `l` - number_unsigned | 2147483648..9223372036854775807 | int64 | `L` - number_unsigned | 2147483649..18446744073709551615 | high-precision | `H` - number_float | *any value* | float64 | `D` - string | *with shortest length indicator* | string | `S` - array | *see notes on optimized format* | array | `[` - object | *see notes on optimized format* | map | `{` - - @note The mapping is **complete** in the sense that any JSON value type - can be converted to a UBJSON value. - - @note The following values can **not** be converted to a UBJSON value: - - strings with more than 9223372036854775807 bytes (theoretical) - - @note The following markers are not used in the conversion: - - `Z`: no-op values are not created. - - `C`: single-byte strings are serialized with `S` markers. - - @note Any UBJSON output created @ref to_ubjson can be successfully parsed - by @ref from_ubjson. - - @note If NaN or Infinity are stored inside a JSON number, they are - serialized properly. This behavior differs from the @ref dump() - function which serializes NaN or Infinity to `null`. - - @note The optimized formats for containers are supported: Parameter - @a use_size adds size information to the beginning of a container and - removes the closing marker. Parameter @a use_type further checks - whether all elements of a container have the same type and adds the - type marker to the beginning of the container. The @a use_type - parameter must only be used together with @a use_size = true. Note - that @a use_size = true alone may result in larger representations - - the benefit of this parameter is that the receiving side is - immediately informed on the number of elements of the container. - - @note If the JSON data contains the binary type, the value stored is a list - of integers, as suggested by the UBJSON documentation. In particular, - this means that serialization and the deserialization of a JSON - containing binary values into UBJSON and back will result in a - different JSON object. - - @param[in] j JSON value to serialize - @param[in] use_size whether to add size annotations to container types - @param[in] use_type whether to add type annotations to container types - (must be combined with @a use_size = true) - @return UBJSON serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in UBJSON format.,to_ubjson} - - @sa http://ubjson.org - @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the - analogous deserialization - @sa @ref to_cbor(const basic_json& for the related CBOR format - @sa @ref to_msgpack(const basic_json&) for the related MessagePack format - - @since version 3.1.0 - */ - static std::vector to_ubjson(const basic_json& j, - const bool use_size = false, - const bool use_type = false) - { - std::vector result; - to_ubjson(j, result, use_size, use_type); - return result; - } - - static void to_ubjson(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false) - { - binary_writer(o).write_ubjson(j, use_size, use_type); - } - - static void to_ubjson(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false) - { - binary_writer(o).write_ubjson(j, use_size, use_type); - } - - - /*! - @brief Serializes the given JSON object `j` to BSON and returns a vector - containing the corresponding BSON-representation. - - BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are - stored as a single entity (a so-called document). - - The library uses the following mapping from JSON values types to BSON types: - - JSON value type | value/range | BSON type | marker - --------------- | --------------------------------- | ----------- | ------ - null | `null` | null | 0x0A - boolean | `true`, `false` | boolean | 0x08 - number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 - number_integer | -2147483648..2147483647 | int32 | 0x10 - number_integer | 2147483648..9223372036854775807 | int64 | 0x12 - number_unsigned | 0..2147483647 | int32 | 0x10 - number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 - number_unsigned | 9223372036854775808..18446744073709551615| -- | -- - number_float | *any value* | double | 0x01 - string | *any value* | string | 0x02 - array | *any value* | document | 0x04 - object | *any value* | document | 0x03 - binary | *any value* | binary | 0x05 - - @warning The mapping is **incomplete**, since only JSON-objects (and things - contained therein) can be serialized to BSON. - Also, integers larger than 9223372036854775807 cannot be serialized to BSON, - and the keys may not contain U+0000, since they are serialized a - zero-terminated c-strings. - - @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` - @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) - @throw type_error.317 if `!j.is_object()` - - @pre The input `j` is required to be an object: `j.is_object() == true`. - - @note Any BSON output created via @ref to_bson can be successfully parsed - by @ref from_bson. - - @param[in] j JSON value to serialize - @return BSON serialization as byte vector - - @complexity Linear in the size of the JSON value @a j. - - @liveexample{The example shows the serialization of a JSON value to a byte - vector in BSON format.,to_bson} - - @sa http://bsonspec.org/spec.html - @sa @ref from_bson(detail::input_adapter&&, const bool strict) for the - analogous deserialization - @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the - related UBJSON format - @sa @ref to_cbor(const basic_json&) for the related CBOR format - @sa @ref to_msgpack(const basic_json&) for the related MessagePack format - */ - static std::vector to_bson(const basic_json& j) - { - std::vector result; - to_bson(j, result); - return result; - } - - /*! - @brief Serializes the given JSON object `j` to BSON and forwards the - corresponding BSON-representation to the given output_adapter `o`. - @param j The JSON object to convert to BSON. - @param o The output adapter that receives the binary BSON representation. - @pre The input `j` shall be an object: `j.is_object() == true` - @sa @ref to_bson(const basic_json&) - */ - static void to_bson(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_bson(j); - } - - /*! - @copydoc to_bson(const basic_json&, detail::output_adapter) - */ - static void to_bson(const basic_json& j, detail::output_adapter o) - { - binary_writer(o).write_bson(j); - } - - - /*! - @brief create a JSON value from an input in CBOR format - - Deserializes a given input @a i to a JSON value using the CBOR (Concise - Binary Object Representation) serialization format. - - The library maps CBOR types to JSON value types as follows: - - CBOR type | JSON value type | first byte - ---------------------- | --------------- | ---------- - Integer | number_unsigned | 0x00..0x17 - Unsigned integer | number_unsigned | 0x18 - Unsigned integer | number_unsigned | 0x19 - Unsigned integer | number_unsigned | 0x1A - Unsigned integer | number_unsigned | 0x1B - Negative integer | number_integer | 0x20..0x37 - Negative integer | number_integer | 0x38 - Negative integer | number_integer | 0x39 - Negative integer | number_integer | 0x3A - Negative integer | number_integer | 0x3B - Byte string | binary | 0x40..0x57 - Byte string | binary | 0x58 - Byte string | binary | 0x59 - Byte string | binary | 0x5A - Byte string | binary | 0x5B - UTF-8 string | string | 0x60..0x77 - UTF-8 string | string | 0x78 - UTF-8 string | string | 0x79 - UTF-8 string | string | 0x7A - UTF-8 string | string | 0x7B - UTF-8 string | string | 0x7F - array | array | 0x80..0x97 - array | array | 0x98 - array | array | 0x99 - array | array | 0x9A - array | array | 0x9B - array | array | 0x9F - map | object | 0xA0..0xB7 - map | object | 0xB8 - map | object | 0xB9 - map | object | 0xBA - map | object | 0xBB - map | object | 0xBF - False | `false` | 0xF4 - True | `true` | 0xF5 - Null | `null` | 0xF6 - Half-Precision Float | number_float | 0xF9 - Single-Precision Float | number_float | 0xFA - Double-Precision Float | number_float | 0xFB - - @warning The mapping is **incomplete** in the sense that not all CBOR - types can be converted to a JSON value. The following CBOR types - are not supported and will yield parse errors (parse_error.112): - - date/time (0xC0..0xC1) - - bignum (0xC2..0xC3) - - decimal fraction (0xC4) - - bigfloat (0xC5) - - expected conversions (0xD5..0xD7) - - simple values (0xE0..0xF3, 0xF8) - - undefined (0xF7) - - @warning CBOR allows map keys of any type, whereas JSON only allows - strings as keys in object values. Therefore, CBOR maps with keys - other than UTF-8 strings are rejected (parse_error.113). - - @note Any CBOR output created @ref to_cbor can be successfully parsed by - @ref from_cbor. - - @param[in] i an input in CBOR format convertible to an input adapter - @param[in] strict whether to expect the input to be consumed until EOF - (true by default) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - @param[in] tag_handler how to treat CBOR tags (optional, error by default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.110 if the given input ends prematurely or the end of - file was not reached when @a strict was set to true - @throw parse_error.112 if unsupported features from CBOR were - used in the given input @a v or if the input is not valid CBOR - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the input @a i. - - @liveexample{The example shows the deserialization of a byte vector in CBOR - format to a JSON value.,from_cbor} - - @sa http://cbor.io - @sa @ref to_cbor(const basic_json&) for the analogous serialization - @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the - related MessagePack format - @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the - related UBJSON format - - @since version 2.0.9; parameter @a start_index since 2.1.1; changed to - consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0; added @a allow_exceptions parameter - since 3.2.0; added @a tag_handler parameter since 3.9.0. - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_cbor(InputType&& i, - const bool strict = true, - const bool allow_exceptions = true, - const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); - return res ? result : basic_json(value_t::discarded); - } - - /*! - @copydoc from_cbor(detail::input_adapter&&, const bool, const bool, const cbor_tag_handler_t) - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_cbor(IteratorType first, IteratorType last, - const bool strict = true, - const bool allow_exceptions = true, - const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); - return res ? result : basic_json(value_t::discarded); - } - - template - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len)) - static basic_json from_cbor(const T* ptr, std::size_t len, - const bool strict = true, - const bool allow_exceptions = true, - const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) - { - return from_cbor(ptr, ptr + len, strict, allow_exceptions, tag_handler); - } - - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len)) - static basic_json from_cbor(detail::span_input_adapter&& i, - const bool strict = true, - const bool allow_exceptions = true, - const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = i.get(); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); - return res ? result : basic_json(value_t::discarded); - } - - /*! - @brief create a JSON value from an input in MessagePack format - - Deserializes a given input @a i to a JSON value using the MessagePack - serialization format. - - The library maps MessagePack types to JSON value types as follows: - - MessagePack type | JSON value type | first byte - ---------------- | --------------- | ---------- - positive fixint | number_unsigned | 0x00..0x7F - fixmap | object | 0x80..0x8F - fixarray | array | 0x90..0x9F - fixstr | string | 0xA0..0xBF - nil | `null` | 0xC0 - false | `false` | 0xC2 - true | `true` | 0xC3 - float 32 | number_float | 0xCA - float 64 | number_float | 0xCB - uint 8 | number_unsigned | 0xCC - uint 16 | number_unsigned | 0xCD - uint 32 | number_unsigned | 0xCE - uint 64 | number_unsigned | 0xCF - int 8 | number_integer | 0xD0 - int 16 | number_integer | 0xD1 - int 32 | number_integer | 0xD2 - int 64 | number_integer | 0xD3 - str 8 | string | 0xD9 - str 16 | string | 0xDA - str 32 | string | 0xDB - array 16 | array | 0xDC - array 32 | array | 0xDD - map 16 | object | 0xDE - map 32 | object | 0xDF - bin 8 | binary | 0xC4 - bin 16 | binary | 0xC5 - bin 32 | binary | 0xC6 - ext 8 | binary | 0xC7 - ext 16 | binary | 0xC8 - ext 32 | binary | 0xC9 - fixext 1 | binary | 0xD4 - fixext 2 | binary | 0xD5 - fixext 4 | binary | 0xD6 - fixext 8 | binary | 0xD7 - fixext 16 | binary | 0xD8 - negative fixint | number_integer | 0xE0-0xFF - - @note Any MessagePack output created @ref to_msgpack can be successfully - parsed by @ref from_msgpack. - - @param[in] i an input in MessagePack format convertible to an input - adapter - @param[in] strict whether to expect the input to be consumed until EOF - (true by default) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.110 if the given input ends prematurely or the end of - file was not reached when @a strict was set to true - @throw parse_error.112 if unsupported features from MessagePack were - used in the given input @a i or if the input is not valid MessagePack - @throw parse_error.113 if a string was expected as map key, but not found - - @complexity Linear in the size of the input @a i. - - @liveexample{The example shows the deserialization of a byte vector in - MessagePack format to a JSON value.,from_msgpack} - - @sa http://msgpack.org - @sa @ref to_msgpack(const basic_json&) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool, const cbor_tag_handler_t) for the - related CBOR format - @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for - the related UBJSON format - @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for - the related BSON format - - @since version 2.0.9; parameter @a start_index since 2.1.1; changed to - consume input adapters, removed start_index parameter, and added - @a strict parameter since 3.0.0; added @a allow_exceptions parameter - since 3.2.0 - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_msgpack(InputType&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - /*! - @copydoc from_msgpack(detail::input_adapter&&, const bool, const bool) - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_msgpack(IteratorType first, IteratorType last, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - - template - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len)) - static basic_json from_msgpack(const T* ptr, std::size_t len, - const bool strict = true, - const bool allow_exceptions = true) - { - return from_msgpack(ptr, ptr + len, strict, allow_exceptions); - } - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len)) - static basic_json from_msgpack(detail::span_input_adapter&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = i.get(); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - - /*! - @brief create a JSON value from an input in UBJSON format - - Deserializes a given input @a i to a JSON value using the UBJSON (Universal - Binary JSON) serialization format. - - The library maps UBJSON types to JSON value types as follows: - - UBJSON type | JSON value type | marker - ----------- | --------------------------------------- | ------ - no-op | *no value, next value is read* | `N` - null | `null` | `Z` - false | `false` | `F` - true | `true` | `T` - float32 | number_float | `d` - float64 | number_float | `D` - uint8 | number_unsigned | `U` - int8 | number_integer | `i` - int16 | number_integer | `I` - int32 | number_integer | `l` - int64 | number_integer | `L` - high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H' - string | string | `S` - char | string | `C` - array | array (optimized values are supported) | `[` - object | object (optimized values are supported) | `{` - - @note The mapping is **complete** in the sense that any UBJSON value can - be converted to a JSON value. - - @param[in] i an input in UBJSON format convertible to an input adapter - @param[in] strict whether to expect the input to be consumed until EOF - (true by default) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.110 if the given input ends prematurely or the end of - file was not reached when @a strict was set to true - @throw parse_error.112 if a parse error occurs - @throw parse_error.113 if a string could not be parsed successfully - - @complexity Linear in the size of the input @a i. - - @liveexample{The example shows the deserialization of a byte vector in - UBJSON format to a JSON value.,from_ubjson} - - @sa http://ubjson.org - @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the - analogous serialization - @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool, const cbor_tag_handler_t) for the - related CBOR format - @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for - the related MessagePack format - @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for - the related BSON format - - @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_ubjson(InputType&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - /*! - @copydoc from_ubjson(detail::input_adapter&&, const bool, const bool) - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_ubjson(IteratorType first, IteratorType last, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - template - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len)) - static basic_json from_ubjson(const T* ptr, std::size_t len, - const bool strict = true, - const bool allow_exceptions = true) - { - return from_ubjson(ptr, ptr + len, strict, allow_exceptions); - } - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len)) - static basic_json from_ubjson(detail::span_input_adapter&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = i.get(); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - - /*! - @brief Create a JSON value from an input in BSON format - - Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) - serialization format. - - The library maps BSON record types to JSON value types as follows: - - BSON type | BSON marker byte | JSON value type - --------------- | ---------------- | --------------------------- - double | 0x01 | number_float - string | 0x02 | string - document | 0x03 | object - array | 0x04 | array - binary | 0x05 | still unsupported - undefined | 0x06 | still unsupported - ObjectId | 0x07 | still unsupported - boolean | 0x08 | boolean - UTC Date-Time | 0x09 | still unsupported - null | 0x0A | null - Regular Expr. | 0x0B | still unsupported - DB Pointer | 0x0C | still unsupported - JavaScript Code | 0x0D | still unsupported - Symbol | 0x0E | still unsupported - JavaScript Code | 0x0F | still unsupported - int32 | 0x10 | number_integer - Timestamp | 0x11 | still unsupported - 128-bit decimal float | 0x13 | still unsupported - Max Key | 0x7F | still unsupported - Min Key | 0xFF | still unsupported - - @warning The mapping is **incomplete**. The unsupported mappings - are indicated in the table above. - - @param[in] i an input in BSON format convertible to an input adapter - @param[in] strict whether to expect the input to be consumed until EOF - (true by default) - @param[in] allow_exceptions whether to throw exceptions in case of a - parse error (optional, true by default) - - @return deserialized JSON value; in case of a parse error and - @a allow_exceptions set to `false`, the return value will be - value_t::discarded. - - @throw parse_error.114 if an unsupported BSON record type is encountered - - @complexity Linear in the size of the input @a i. - - @liveexample{The example shows the deserialization of a byte vector in - BSON format to a JSON value.,from_bson} - - @sa http://bsonspec.org/spec.html - @sa @ref to_bson(const basic_json&) for the analogous serialization - @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool, const cbor_tag_handler_t) for the - related CBOR format - @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for - the related MessagePack format - @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the - related UBJSON format - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_bson(InputType&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::forward(i)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - /*! - @copydoc from_bson(detail::input_adapter&&, const bool, const bool) - */ - template - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json from_bson(IteratorType first, IteratorType last, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = detail::input_adapter(std::move(first), std::move(last)); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - - template - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len)) - static basic_json from_bson(const T* ptr, std::size_t len, - const bool strict = true, - const bool allow_exceptions = true) - { - return from_bson(ptr, ptr + len, strict, allow_exceptions); - } - - JSON_HEDLEY_WARN_UNUSED_RESULT - JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len)) - static basic_json from_bson(detail::span_input_adapter&& i, - const bool strict = true, - const bool allow_exceptions = true) - { - basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); - auto ia = i.get(); - const bool res = binary_reader(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict); - return res ? result : basic_json(value_t::discarded); - } - /// @} - - ////////////////////////// - // JSON Pointer support // - ////////////////////////// - - /// @name JSON Pointer functions - /// @{ - - /*! - @brief access specified element via JSON Pointer - - Uses a JSON pointer to retrieve a reference to the respective JSON value. - No bound checking is performed. Similar to @ref operator[](const typename - object_t::key_type&), `null` values are created in arrays and objects if - necessary. - - In particular: - - If the JSON pointer points to an object key that does not exist, it - is created an filled with a `null` value before a reference to it - is returned. - - If the JSON pointer points to an array index that does not exist, it - is created an filled with a `null` value before a reference to it - is returned. All indices between the current maximum and the given - index are also filled with `null`. - - The special value `-` is treated as a synonym for the index past the - end. - - @param[in] ptr a JSON pointer - - @return reference to the element pointed to by @a ptr - - @complexity Constant. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.404 if the JSON pointer can not be resolved - - @liveexample{The behavior is shown in the example.,operatorjson_pointer} - - @since version 2.0.0 - */ - reference operator[](const json_pointer& ptr) - { - return ptr.get_unchecked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Uses a JSON pointer to retrieve a reference to the respective JSON value. - No bound checking is performed. The function does not change the JSON - value; no `null` values are created. In particular, the special value - `-` yields an exception. - - @param[in] ptr JSON pointer to the desired element - - @return const reference to the element pointed to by @a ptr - - @complexity Constant. - - @throw parse_error.106 if an array index begins with '0' - @throw parse_error.109 if an array index was not a number - @throw out_of_range.402 if the array index '-' is used - @throw out_of_range.404 if the JSON pointer can not be resolved - - @liveexample{The behavior is shown in the example.,operatorjson_pointer_const} - - @since version 2.0.0 - */ - const_reference operator[](const json_pointer& ptr) const - { - return ptr.get_unchecked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Returns a reference to the element at with specified JSON pointer @a ptr, - with bounds checking. - - @param[in] ptr JSON pointer to the desired element - - @return reference to the element pointed to by @a ptr - - @throw parse_error.106 if an array index in the passed JSON pointer @a ptr - begins with '0'. See example below. - - @throw parse_error.109 if an array index in the passed JSON pointer @a ptr - is not a number. See example below. - - @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr - is out of range. See example below. - - @throw out_of_range.402 if the array index '-' is used in the passed JSON - pointer @a ptr. As `at` provides checked access (and no elements are - implicitly inserted), the index '-' is always invalid. See example below. - - @throw out_of_range.403 if the JSON pointer describes a key of an object - which cannot be found. See example below. - - @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. - See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 2.0.0 - - @liveexample{The behavior is shown in the example.,at_json_pointer} - */ - reference at(const json_pointer& ptr) - { - return ptr.get_checked(this); - } - - /*! - @brief access specified element via JSON Pointer - - Returns a const reference to the element at with specified JSON pointer @a - ptr, with bounds checking. - - @param[in] ptr JSON pointer to the desired element - - @return reference to the element pointed to by @a ptr - - @throw parse_error.106 if an array index in the passed JSON pointer @a ptr - begins with '0'. See example below. - - @throw parse_error.109 if an array index in the passed JSON pointer @a ptr - is not a number. See example below. - - @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr - is out of range. See example below. - - @throw out_of_range.402 if the array index '-' is used in the passed JSON - pointer @a ptr. As `at` provides checked access (and no elements are - implicitly inserted), the index '-' is always invalid. See example below. - - @throw out_of_range.403 if the JSON pointer describes a key of an object - which cannot be found. See example below. - - @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. - See example below. - - @exceptionsafety Strong guarantee: if an exception is thrown, there are no - changes in the JSON value. - - @complexity Constant. - - @since version 2.0.0 - - @liveexample{The behavior is shown in the example.,at_json_pointer_const} - */ - const_reference at(const json_pointer& ptr) const - { - return ptr.get_checked(this); - } - - /*! - @brief return flattened JSON value - - The function creates a JSON object whose keys are JSON pointers (see [RFC - 6901](https://tools.ietf.org/html/rfc6901)) and whose values are all - primitive. The original JSON value can be restored using the @ref - unflatten() function. - - @return an object that maps JSON pointers to primitive values - - @note Empty objects and arrays are flattened to `null` and will not be - reconstructed correctly by the @ref unflatten() function. - - @complexity Linear in the size the JSON value. - - @liveexample{The following code shows how a JSON object is flattened to an - object whose keys consist of JSON pointers.,flatten} - - @sa @ref unflatten() for the reverse function - - @since version 2.0.0 - */ - basic_json flatten() const - { - basic_json result(value_t::object); - json_pointer::flatten("", *this, result); - return result; - } - - /*! - @brief unflatten a previously flattened JSON value - - The function restores the arbitrary nesting of a JSON value that has been - flattened before using the @ref flatten() function. The JSON value must - meet certain constraints: - 1. The value must be an object. - 2. The keys must be JSON pointers (see - [RFC 6901](https://tools.ietf.org/html/rfc6901)) - 3. The mapped values must be primitive JSON types. - - @return the original JSON from a flattened version - - @note Empty objects and arrays are flattened by @ref flatten() to `null` - values and can not unflattened to their original type. Apart from - this example, for a JSON value `j`, the following is always true: - `j == j.flatten().unflatten()`. - - @complexity Linear in the size the JSON value. - - @throw type_error.314 if value is not an object - @throw type_error.315 if object values are not primitive - - @liveexample{The following code shows how a flattened JSON object is - unflattened into the original nested JSON object.,unflatten} - - @sa @ref flatten() for the reverse function - - @since version 2.0.0 - */ - basic_json unflatten() const - { - return json_pointer::unflatten(*this); - } - - /// @} - - ////////////////////////// - // JSON Patch functions // - ////////////////////////// - - /// @name JSON Patch functions - /// @{ - - /*! - @brief applies a JSON patch - - [JSON Patch](http://jsonpatch.com) defines a JSON document structure for - expressing a sequence of operations to apply to a JSON) document. With - this function, a JSON Patch is applied to the current JSON value by - executing all operations from the patch. - - @param[in] json_patch JSON patch document - @return patched document - - @note The application of a patch is atomic: Either all operations succeed - and the patched document is returned or an exception is thrown. In - any case, the original value is not changed: the patch is applied - to a copy of the value. - - @throw parse_error.104 if the JSON patch does not consist of an array of - objects - - @throw parse_error.105 if the JSON patch is malformed (e.g., mandatory - attributes are missing); example: `"operation add must have member path"` - - @throw out_of_range.401 if an array index is out of range. - - @throw out_of_range.403 if a JSON pointer inside the patch could not be - resolved successfully in the current JSON value; example: `"key baz not - found"` - - @throw out_of_range.405 if JSON pointer has no parent ("add", "remove", - "move") - - @throw other_error.501 if "test" operation was unsuccessful - - @complexity Linear in the size of the JSON value and the length of the - JSON patch. As usually only a fraction of the JSON value is affected by - the patch, the complexity can usually be neglected. - - @liveexample{The following code shows how a JSON patch is applied to a - value.,patch} - - @sa @ref diff -- create a JSON patch by comparing two JSON values - - @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) - @sa [RFC 6901 (JSON Pointer)](https://tools.ietf.org/html/rfc6901) - - @since version 2.0.0 - */ - basic_json patch(const basic_json& json_patch) const - { - // make a working copy to apply the patch to - basic_json result = *this; - - // the valid JSON Patch operations - enum class patch_operations {add, remove, replace, move, copy, test, invalid}; - - const auto get_op = [](const std::string & op) - { - if (op == "add") - { - return patch_operations::add; - } - if (op == "remove") - { - return patch_operations::remove; - } - if (op == "replace") - { - return patch_operations::replace; - } - if (op == "move") - { - return patch_operations::move; - } - if (op == "copy") - { - return patch_operations::copy; - } - if (op == "test") - { - return patch_operations::test; - } - - return patch_operations::invalid; - }; - - // wrapper for "add" operation; add value at ptr - const auto operation_add = [&result](json_pointer & ptr, basic_json val) - { - // adding to the root of the target document means replacing it - if (ptr.empty()) - { - result = val; - return; - } - - // make sure the top element of the pointer exists - json_pointer top_pointer = ptr.top(); - if (top_pointer != ptr) - { - result.at(top_pointer); - } - - // get reference to parent of JSON pointer ptr - const auto last_path = ptr.back(); - ptr.pop_back(); - basic_json& parent = result[ptr]; - - switch (parent.m_type) - { - case value_t::null: - case value_t::object: - { - // use operator[] to add value - parent[last_path] = val; - break; - } - - case value_t::array: - { - if (last_path == "-") - { - // special case: append to back - parent.push_back(val); - } - else - { - const auto idx = json_pointer::array_index(last_path); - if (JSON_HEDLEY_UNLIKELY(idx > parent.size())) - { - // avoid undefined behavior - JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); - } - - // default case: insert add offset - parent.insert(parent.begin() + static_cast(idx), val); - } - break; - } - - // if there exists a parent it cannot be primitive - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // LCOV_EXCL_LINE - } - }; - - // wrapper for "remove" operation; remove value at ptr - const auto operation_remove = [&result](json_pointer & ptr) - { - // get reference to parent of JSON pointer ptr - const auto last_path = ptr.back(); - ptr.pop_back(); - basic_json& parent = result.at(ptr); - - // remove child - if (parent.is_object()) - { - // perform range check - auto it = parent.find(last_path); - if (JSON_HEDLEY_LIKELY(it != parent.end())) - { - parent.erase(it); - } - else - { - JSON_THROW(out_of_range::create(403, "key '" + last_path + "' not found")); - } - } - else if (parent.is_array()) - { - // note erase performs range check - parent.erase(json_pointer::array_index(last_path)); - } - }; - - // type check: top level value must be an array - if (JSON_HEDLEY_UNLIKELY(!json_patch.is_array())) - { - JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); - } - - // iterate and apply the operations - for (const auto& val : json_patch) - { - // wrapper to get a value for an operation - const auto get_value = [&val](const std::string & op, - const std::string & member, - bool string_type) -> basic_json & - { - // find value - auto it = val.m_value.object->find(member); - - // context-sensitive error message - const auto error_msg = (op == "op") ? "operation" : "operation '" + op + "'"; - - // check if desired value is present - if (JSON_HEDLEY_UNLIKELY(it == val.m_value.object->end())) - { - JSON_THROW(parse_error::create(105, 0, error_msg + " must have member '" + member + "'")); - } - - // check if result is of type string - if (JSON_HEDLEY_UNLIKELY(string_type && !it->second.is_string())) - { - JSON_THROW(parse_error::create(105, 0, error_msg + " must have string member '" + member + "'")); - } - - // no error: return value - return it->second; - }; - - // type check: every element of the array must be an object - if (JSON_HEDLEY_UNLIKELY(!val.is_object())) - { - JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); - } - - // collect mandatory members - const auto op = get_value("op", "op", true).template get(); - const auto path = get_value(op, "path", true).template get(); - json_pointer ptr(path); - - switch (get_op(op)) - { - case patch_operations::add: - { - operation_add(ptr, get_value("add", "value", false)); - break; - } - - case patch_operations::remove: - { - operation_remove(ptr); - break; - } - - case patch_operations::replace: - { - // the "path" location must exist - use at() - result.at(ptr) = get_value("replace", "value", false); - break; - } - - case patch_operations::move: - { - const auto from_path = get_value("move", "from", true).template get(); - json_pointer from_ptr(from_path); - - // the "from" location must exist - use at() - basic_json v = result.at(from_ptr); - - // The move operation is functionally identical to a - // "remove" operation on the "from" location, followed - // immediately by an "add" operation at the target - // location with the value that was just removed. - operation_remove(from_ptr); - operation_add(ptr, v); - break; - } - - case patch_operations::copy: - { - const auto from_path = get_value("copy", "from", true).template get(); - const json_pointer from_ptr(from_path); - - // the "from" location must exist - use at() - basic_json v = result.at(from_ptr); - - // The copy is functionally identical to an "add" - // operation at the target location using the value - // specified in the "from" member. - operation_add(ptr, v); - break; - } - - case patch_operations::test: - { - bool success = false; - JSON_TRY - { - // check if "value" matches the one at "path" - // the "path" location must exist - use at() - success = (result.at(ptr) == get_value("test", "value", false)); - } - JSON_INTERNAL_CATCH (out_of_range&) - { - // ignore out of range errors: success remains false - } - - // throw an exception if test fails - if (JSON_HEDLEY_UNLIKELY(!success)) - { - JSON_THROW(other_error::create(501, "unsuccessful: " + val.dump())); - } - - break; - } - - default: - { - // op must be "add", "remove", "replace", "move", "copy", or - // "test" - JSON_THROW(parse_error::create(105, 0, "operation value '" + op + "' is invalid")); - } - } - } - - return result; - } - - /*! - @brief creates a diff as a JSON patch - - Creates a [JSON Patch](http://jsonpatch.com) so that value @a source can - be changed into the value @a target by calling @ref patch function. - - @invariant For two JSON values @a source and @a target, the following code - yields always `true`: - @code {.cpp} - source.patch(diff(source, target)) == target; - @endcode - - @note Currently, only `remove`, `add`, and `replace` operations are - generated. - - @param[in] source JSON value to compare from - @param[in] target JSON value to compare against - @param[in] path helper value to create JSON pointers - - @return a JSON patch to convert the @a source to @a target - - @complexity Linear in the lengths of @a source and @a target. - - @liveexample{The following code shows how a JSON patch is created as a - diff for two JSON values.,diff} - - @sa @ref patch -- apply a JSON patch - @sa @ref merge_patch -- apply a JSON Merge Patch - - @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) - - @since version 2.0.0 - */ - JSON_HEDLEY_WARN_UNUSED_RESULT - static basic_json diff(const basic_json& source, const basic_json& target, - const std::string& path = "") - { - // the patch - basic_json result(value_t::array); - - // if the values are the same, return empty patch - if (source == target) - { - return result; - } - - if (source.type() != target.type()) - { - // different types: replace value - result.push_back( - { - {"op", "replace"}, {"path", path}, {"value", target} - }); - return result; - } - - switch (source.type()) - { - case value_t::array: - { - // first pass: traverse common elements - std::size_t i = 0; - while (i < source.size() && i < target.size()) - { - // recursive call to compare array values at index i - auto temp_diff = diff(source[i], target[i], path + "/" + std::to_string(i)); - result.insert(result.end(), temp_diff.begin(), temp_diff.end()); - ++i; - } - - // i now reached the end of at least one array - // in a second pass, traverse the remaining elements - - // remove my remaining elements - const auto end_index = static_cast(result.size()); - while (i < source.size()) - { - // add operations in reverse order to avoid invalid - // indices - result.insert(result.begin() + end_index, object( - { - {"op", "remove"}, - {"path", path + "/" + std::to_string(i)} - })); - ++i; - } - - // add other remaining elements - while (i < target.size()) - { - result.push_back( - { - {"op", "add"}, - {"path", path + "/-"}, - {"value", target[i]} - }); - ++i; - } - - break; - } - - case value_t::object: - { - // first pass: traverse this object's elements - for (auto it = source.cbegin(); it != source.cend(); ++it) - { - // escape the key name to be used in a JSON patch - const auto key = json_pointer::escape(it.key()); - - if (target.find(it.key()) != target.end()) - { - // recursive call to compare object values at key it - auto temp_diff = diff(it.value(), target[it.key()], path + "/" + key); - result.insert(result.end(), temp_diff.begin(), temp_diff.end()); - } - else - { - // found a key that is not in o -> remove it - result.push_back(object( - { - {"op", "remove"}, {"path", path + "/" + key} - })); - } - } - - // second pass: traverse other object's elements - for (auto it = target.cbegin(); it != target.cend(); ++it) - { - if (source.find(it.key()) == source.end()) - { - // found a key that is not in this -> add it - const auto key = json_pointer::escape(it.key()); - result.push_back( - { - {"op", "add"}, {"path", path + "/" + key}, - {"value", it.value()} - }); - } - } - - break; - } - - default: - { - // both primitive type: replace value - result.push_back( - { - {"op", "replace"}, {"path", path}, {"value", target} - }); - break; - } - } - - return result; - } - - /// @} - - //////////////////////////////// - // JSON Merge Patch functions // - //////////////////////////////// - - /// @name JSON Merge Patch functions - /// @{ - - /*! - @brief applies a JSON Merge Patch - - The merge patch format is primarily intended for use with the HTTP PATCH - method as a means of describing a set of modifications to a target - resource's content. This function applies a merge patch to the current - JSON value. - - The function implements the following algorithm from Section 2 of - [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396): - - ``` - define MergePatch(Target, Patch): - if Patch is an Object: - if Target is not an Object: - Target = {} // Ignore the contents and set it to an empty Object - for each Name/Value pair in Patch: - if Value is null: - if Name exists in Target: - remove the Name/Value pair from Target - else: - Target[Name] = MergePatch(Target[Name], Value) - return Target - else: - return Patch - ``` - - Thereby, `Target` is the current object; that is, the patch is applied to - the current value. - - @param[in] apply_patch the patch to apply - - @complexity Linear in the lengths of @a patch. - - @liveexample{The following code shows how a JSON Merge Patch is applied to - a JSON document.,merge_patch} - - @sa @ref patch -- apply a JSON patch - @sa [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396) - - @since version 3.0.0 - */ - void merge_patch(const basic_json& apply_patch) - { - if (apply_patch.is_object()) - { - if (!is_object()) - { - *this = object(); - } - for (auto it = apply_patch.begin(); it != apply_patch.end(); ++it) - { - if (it.value().is_null()) - { - erase(it.key()); - } - else - { - operator[](it.key()).merge_patch(it.value()); - } - } - } - else - { - *this = apply_patch; - } - } - - /// @} -}; - -/*! -@brief user-defined to_string function for JSON values - -This function implements a user-defined to_string for JSON objects. - -@param[in] j a JSON object -@return a std::string object -*/ - -NLOHMANN_BASIC_JSON_TPL_DECLARATION -std::string to_string(const NLOHMANN_BASIC_JSON_TPL& j) -{ - return j.dump(); -} -} // namespace nlohmann - -/////////////////////// -// nonmember support // -/////////////////////// - -// specialization of std::swap, and std::hash -namespace std -{ - -/// hash value for JSON objects -template<> -struct hash -{ - /*! - @brief return a hash value for a JSON object - - @since version 1.0.0 - */ - std::size_t operator()(const nlohmann::json& j) const - { - return nlohmann::detail::hash(j); - } -}; - -/// specialization for std::less -/// @note: do not remove the space after '<', -/// see https://github.com/nlohmann/json/pull/679 -template<> -struct less<::nlohmann::detail::value_t> -{ - /*! - @brief compare two value_t enum values - @since version 3.0.0 - */ - bool operator()(nlohmann::detail::value_t lhs, - nlohmann::detail::value_t rhs) const noexcept - { - return nlohmann::detail::operator<(lhs, rhs); - } -}; - -// C++20 prohibit function specialization in the std namespace. -#ifndef JSON_HAS_CPP_20 - -/*! -@brief exchanges the values of two JSON objects - -@since version 1.0.0 -*/ -template<> -inline void swap(nlohmann::json& j1, nlohmann::json& j2) noexcept( - is_nothrow_move_constructible::value&& - is_nothrow_move_assignable::value -) -{ - j1.swap(j2); -} - -#endif - -} // namespace std - -/*! -@brief user-defined string literal for JSON values - -This operator implements a user-defined string literal for JSON objects. It -can be used by adding `"_json"` to a string literal and returns a JSON object -if no parse error occurred. - -@param[in] s a string representation of a JSON object -@param[in] n the length of string @a s -@return a JSON object - -@since version 1.0.0 -*/ -// Work around compiler bug in nvcc 11.0, see NVIDIA/NVBench#18 -#if defined(__NVCC__) && \ - __cplusplus >= 201703L && \ - __CUDACC_VER_MAJOR__ == 11 && \ - __CUDACC_VER_MINOR__ == 0 - -#else -JSON_HEDLEY_NON_NULL(1) -inline nlohmann::json operator "" _json(const char* s, std::size_t n) -{ - return nlohmann::json::parse(s, s + n); -} -#endif - -/*! -@brief user-defined string literal for JSON pointer - -This operator implements a user-defined string literal for JSON Pointers. It -can be used by adding `"_json_pointer"` to a string literal and returns a JSON pointer -object if no parse error occurred. - -@param[in] s a string representation of a JSON Pointer -@param[in] n the length of string @a s -@return a JSON pointer object - -@since version 2.0.0 -*/ -JSON_HEDLEY_NON_NULL(1) -inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std::size_t n) -{ - return nlohmann::json::json_pointer(std::string(s, n)); -} - -#include - -#endif // INCLUDE_NLOHMANN_JSON_HPP_ diff --git a/docs/benchmarks.md b/docs/benchmarks.md index ef9fb48f..dfd7b07e 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -188,7 +188,7 @@ void my_benchmark(nvbench::state& state, nvbench::type_list) } using my_types = nvbench::type_list; NVBENCH_BENCH_TYPES(my_benchmark, NVBENCH_TYPE_AXES(my_types)) - .set_type_axis_names({"ValueType"}); + .set_type_axes_names({"ValueType"}); ``` The `NVBENCH_TYPE_AXES` macro is unfortunately necessary to prevent commas in diff --git a/docs/cli_help.md b/docs/cli_help.md index 8629e8f5..0336c5ed 100644 --- a/docs/cli_help.md +++ b/docs/cli_help.md @@ -89,8 +89,15 @@ * Applies to the most recent `--benchmark`, or all benchmarks if specified before any `--benchmark` arguments. +* `--stopping-criterion ` + * After `--min-samples` is satisfied, use `` to detect if enough + samples were collected. + * Only applies to Cold measurements. + * Default is stdrel (`--stopping-criterion stdrel`) + * `--min-time ` * Accumulate at least `` of execution time per measurement. + * Only applies to `stdrel` stopping criterion. * Default is 0.5 seconds. * If both GPU and CPU times are gathered, this applies to GPU time only. * Applies to the most recent `--benchmark`, or all benchmarks if specified @@ -100,6 +107,7 @@ * Gather samples until the error in the measurement drops below ``. * Noise is specified as the percent relative standard deviation. * Default is 0.5% (`--max-noise 0.5`) + * Only applies to `stdrel` stopping criterion. * Only applies to Cold measurements. * If both GPU and CPU times are gathered, this applies to GPU noise only. * Applies to the most recent `--benchmark`, or all benchmarks if specified diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 219fc898..a98bcbeb 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,51 +1,71 @@ set(example_srcs + auto_throughput.cu axes.cu + custom_criterion.cu enums.cu exec_tag_sync.cu exec_tag_timer.cu skip.cu stream.cu throughput.cu - auto_throughput.cu ) # Metatarget for all examples: add_custom_target(nvbench.example.all) add_dependencies(nvbench.all nvbench.example.all) -foreach(example_src IN LISTS example_srcs) - get_filename_component(example_name "${example_src}" NAME_WLE) - string(PREPEND example_name "nvbench.example.") - add_executable(${example_name} "${example_src}") - nvbench_config_target(${example_name}) - target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}") - target_link_libraries(${example_name} PRIVATE nvbench::main) - set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_17) - add_test(NAME ${example_name} - COMMAND "$" --timeout 0.1 --min-time 1e-5 - ) - add_dependencies(nvbench.example.all ${example_name}) -endforeach() +function (nvbench_add_examples_target target_prefix cuda_std) + add_custom_target(${target_prefix}.all) + add_dependencies(nvbench.example.all ${target_prefix}.all) -# Silence some warnings from old thrust headers: -set(thrust_examples - auto_throughput - axes - exec_tag_sync - exec_tag_timer - skip - throughput -) -foreach (example IN LISTS thrust_examples) - if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # C4324: structure was padded due to alignment specifier - nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4324") - - # warning C4201: nonstandard extension used: nameless struct/union: - # Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3) - if (${CUDAToolkit_VERSION} VERSION_LESS 11.4) - nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4201") + foreach(example_src IN LISTS example_srcs) + get_filename_component(example_name "${example_src}" NAME_WLE) + string(PREPEND example_name "${target_prefix}.") + add_executable(${example_name} "${example_src}") + nvbench_config_target(${example_name}) + target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}") + target_link_libraries(${example_name} PRIVATE nvbench::main) + set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std}) + add_test(NAME ${example_name} + COMMAND "$" --timeout 0.1 --min-time 1e-5 + ) + + # These should not deadlock. If they do, it may be that the CUDA context was created before + # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. + set_tests_properties(${example_name} PROPERTIES + FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected" + ) + + add_dependencies(${target_prefix}.all ${example_name}) + endforeach() + + # Silence some warnings from old thrust headers: + set(thrust_examples + auto_throughput + axes + custom_criterion + exec_tag_sync + exec_tag_timer + skip + stream + throughput + ) + foreach (example IN LISTS thrust_examples) + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # C4324: structure was padded due to alignment specifier + nvbench_add_cxx_flag(${target_prefix}.${example} PRIVATE "/wd4324") + + # warning C4201: nonstandard extension used: nameless struct/union: + # Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3) + if (${CUDAToolkit_VERSION} VERSION_LESS 11.4) + nvbench_add_cxx_flag(${target_prefix}.${example} PRIVATE "/wd4201") + endif() endif() - endif() + endforeach() +endfunction() + + +foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS) + nvbench_add_examples_target(nvbench.example.cpp${std} ${std}) endforeach() diff --git a/examples/axes.cu b/examples/axes.cu index b8c21152..44ae5988 100644 --- a/examples/axes.cu +++ b/examples/axes.cu @@ -56,8 +56,8 @@ NVBENCH_BENCH(single_float64_axis) void copy_sweep_grid_shape(nvbench::state &state) { // Get current parameters: - const int block_size = static_cast(state.get_int64("BlockSize")); - const int num_blocks = static_cast(state.get_int64("NumBlocks")); + const auto block_size = static_cast(state.get_int64("BlockSize")); + const auto num_blocks = static_cast(state.get_int64("NumBlocks")); // Number of int32s in 256 MiB: const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t); @@ -77,6 +77,7 @@ void copy_sweep_grid_shape(nvbench::state &state) num_values, in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<>>( in_ptr, out_ptr, @@ -110,6 +111,7 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list) [num_values, in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values); @@ -133,7 +135,7 @@ void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list) { // Optional: Skip narrowing conversions. - if (sizeof(InputType) > sizeof(OutputType)) + if constexpr(sizeof(InputType) > sizeof(OutputType)) { state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."); return; @@ -156,6 +158,7 @@ void copy_type_conversion_sweep(nvbench::state &state, [num_values, in_ptr = thrust::raw_pointer_cast(in.data()), out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + (void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values); diff --git a/examples/custom_criterion.cu b/examples/custom_criterion.cu new file mode 100644 index 00000000..46612355 --- /dev/null +++ b/examples/custom_criterion.cu @@ -0,0 +1,81 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// Grab some testing kernels from NVBench: +#include + +// Thrust vectors simplify memory management: +#include + +// Inherit from the stopping_criterion_base class: +class fixed_criterion final : public nvbench::stopping_criterion_base +{ + nvbench::int64_t m_num_samples{}; + +public: + fixed_criterion() + : nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}} + {} + +protected: + // Setup the criterion in the `do_initialize()` method: + virtual void do_initialize() override + { + m_num_samples = 0; + } + + // Process new measurements in the `add_measurement()` method: + virtual void do_add_measurement(nvbench::float64_t /* measurement */) override + { + m_num_samples++; + } + + // Check if the stopping criterion is met in the `is_finished()` method: + virtual bool do_is_finished() override + { + return m_num_samples >= m_params.get_int64("max-samples"); + } + +}; + +// Register the criterion with NVBench: +NVBENCH_REGISTER_CRITERION(fixed_criterion); + +void throughput_bench(nvbench::state &state) +{ + // Allocate input data: + const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t); + thrust::device_vector input(num_values); + thrust::device_vector output(num_values); + + // Provide throughput information: + state.add_element_count(num_values, "NumElements"); + state.add_global_memory_reads(num_values, "DataSize"); + state.add_global_memory_writes(num_values); + + state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) { + (void) num_values; // clang thinks this is unused... + nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( + thrust::raw_pointer_cast(input.data()), + thrust::raw_pointer_cast(output.data()), + num_values); + }); +} +NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed"); diff --git a/examples/enums.cu b/examples/enums.cu index c14c2d48..fa149acd 100644 --- a/examples/enums.cu +++ b/examples/enums.cu @@ -91,7 +91,7 @@ NVBENCH_BENCH(runtime_enum_sweep_string) // ``` void runtime_enum_sweep_int64(nvbench::state &state) { - const auto enum_value = static_cast(state.get_int64("MyEnum")); + [[maybe_unused]] const auto enum_value = static_cast(state.get_int64("MyEnum")); // Do stuff with enum_value. // Create inputs, etc, configure runtime kernel parameters, etc. diff --git a/examples/exec_tag_sync.cu b/examples/exec_tag_sync.cu index 0ef4ee78..13669314 100644 --- a/examples/exec_tag_sync.cu +++ b/examples/exec_tag_sync.cu @@ -27,6 +27,9 @@ // Used to initialize input data: #include +// Used to run the benchmark on a CUDA stream +#include + // `sequence_bench` measures the execution time of `thrust::sequence`. Since // algorithms in `thrust::` implicitly sync the CUDA device, the // `nvbench::exec_tag::sync` must be passed to `state.exec(...)`. diff --git a/examples/exec_tag_timer.cu b/examples/exec_tag_timer.cu index 6aab8582..e283f43b 100644 --- a/examples/exec_tag_timer.cu +++ b/examples/exec_tag_timer.cu @@ -23,6 +23,7 @@ // Thrust simplifies memory management, etc: #include +#include #include #include @@ -53,6 +54,8 @@ void mod2_inplace(nvbench::state &state) state.exec(nvbench::exec_tag::timer, // Lambda now takes a `timer` argument: [&input, &data, num_values](nvbench::launch &launch, auto &timer) { + (void) num_values; // clang thinks this is unused... + // Reset working data: thrust::copy(thrust::device.on(launch.get_stream()), input.cbegin(), diff --git a/examples/stream.cu b/examples/stream.cu index 9507558d..20254e5e 100644 --- a/examples/stream.cu +++ b/examples/stream.cu @@ -52,6 +52,7 @@ void stream_bench(nvbench::state &state) state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream)); state.exec([&input, &output, num_values](nvbench::launch &) { + (void) num_values; // clang thinks this is unused... copy(thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()), num_values); diff --git a/examples/throughput.cu b/examples/throughput.cu index 5621ebd7..24df6ee8 100644 --- a/examples/throughput.cu +++ b/examples/throughput.cu @@ -51,6 +51,7 @@ void throughput_bench(nvbench::state &state) state.add_global_memory_writes(num_values); state.exec([&input, &output, num_values](nvbench::launch &launch) { + (void) num_values; // clang thinks this is unused... nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>( thrust::raw_pointer_cast(input.data()), thrust::raw_pointer_cast(output.data()), diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index f86bd415..182843c1 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -4,6 +4,7 @@ set(srcs benchmark_base.cxx benchmark_manager.cxx blocking_kernel.cu + criterion_manager.cxx csv_printer.cu cuda_call.cu device_info.cu @@ -17,23 +18,24 @@ set(srcs printer_multiplex.cxx runner.cxx state.cxx + stopping_criterion.cxx string_axis.cxx type_axis.cxx type_strings.cxx + detail/entropy_criterion.cxx detail/measure_cold.cu detail/measure_hot.cu detail/state_generator.cxx + detail/stdrel_criterion.cxx + + internal/nvml.cxx ) if (NVBench_ENABLE_CUPTI) list(APPEND srcs detail/measure_cupti.cu cupti_profiler.cxx) endif() -if (NVBench_ENABLE_NVML) - list(APPEND srcs internal/nvml.cxx) -endif() - # CUDA 11.0 can't compile json_printer without crashing # So for that version fall back to C++ with degraded # output ( no PTX version info ) @@ -65,7 +67,7 @@ nvbench_write_config_header(config.cuh.in ) # nvbench (nvbench::nvbench) -add_library(nvbench SHARED ${srcs}) +add_library(nvbench ${srcs}) nvbench_config_target(nvbench) target_include_directories(nvbench PUBLIC "$" @@ -78,8 +80,29 @@ target_link_libraries(nvbench PRIVATE fmt::fmt nvbench_json - nvbench_git_revision ) + +# ################################################################################################## +# * conda environment ----------------------------------------------------------------------------- +rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) +if(TARGET conda_env) + # When we are inside a conda env the linker will be set to + # `ld.bfd` which will try to resolve all undefined symbols at link time. + # + # Since we could be using a shared library version of fmt we need + # it on the final link line of consumers + target_link_libraries(nvbench PRIVATE $) + + # When we are inside a conda env the linker will be set to + # `ld.bfd` which will try to resolve all undefined symbols at link time. + # + # Since we could be using a shared library version of fmt we need + # it on the final link line of consumers + if(fmt_is_external) + target_link_libraries(nvbench PUBLIC fmt::fmt) + endif() +endif() + target_compile_features(nvbench PUBLIC cuda_std_17 PRIVATE cxx_std_17) add_dependencies(nvbench.all nvbench) @@ -107,3 +130,18 @@ if (json_is_cu) $<$:-Xcudafe=--diag_suppress=940> ) endif() + +# The call to `rapids_cmake_write_git_revision_file` must be in the same +# CMakeLists.txt as the consumer ( nvbench ) for CMake to get the dependency +# graph correct. +rapids_cmake_write_git_revision_file( + nvbench_git_revision + "${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh" +) +target_link_libraries(nvbench PRIVATE nvbench_git_revision) + +if(NOT BUILD_SHARED_LIBS) + # Need to ensure that for static builds we export the nvbench_git_revision + # target + nvbench_install_libraries(nvbench_git_revision) +endif() diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh index 353855a8..26631913 100644 --- a/nvbench/axes_metadata.cuh +++ b/nvbench/axes_metadata.cuh @@ -41,8 +41,8 @@ struct axes_metadata template explicit axes_metadata(nvbench::type_list); - axes_metadata() = default; - axes_metadata(axes_metadata &&) = default; + axes_metadata() = default; + axes_metadata(axes_metadata &&) = default; axes_metadata &operator=(axes_metadata &&) = default; axes_metadata(const axes_metadata &); @@ -58,20 +58,16 @@ struct axes_metadata void add_string_axis(std::string name, std::vector data); - [[nodiscard]] const nvbench::int64_axis & - get_int64_axis(std::string_view name) const; + [[nodiscard]] const nvbench::int64_axis &get_int64_axis(std::string_view name) const; [[nodiscard]] nvbench::int64_axis &get_int64_axis(std::string_view name); - [[nodiscard]] const nvbench::float64_axis & - get_float64_axis(std::string_view name) const; + [[nodiscard]] const nvbench::float64_axis &get_float64_axis(std::string_view name) const; [[nodiscard]] nvbench::float64_axis &get_float64_axis(std::string_view name); - [[nodiscard]] const nvbench::string_axis & - get_string_axis(std::string_view name) const; + [[nodiscard]] const nvbench::string_axis &get_string_axis(std::string_view name) const; [[nodiscard]] nvbench::string_axis &get_string_axis(std::string_view name); - [[nodiscard]] const nvbench::type_axis & - get_type_axis(std::string_view name) const; + [[nodiscard]] const nvbench::type_axis &get_type_axis(std::string_view name) const; [[nodiscard]] nvbench::type_axis &get_type_axis(std::string_view name); [[nodiscard]] const nvbench::type_axis &get_type_axis(std::size_t index) const; @@ -83,10 +79,9 @@ struct axes_metadata [[nodiscard]] const nvbench::axis_base &get_axis(std::string_view name) const; [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name); - [[nodiscard]] const nvbench::axis_base & - get_axis(std::string_view name, nvbench::axis_type type) const; - [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name, - nvbench::axis_type type); + [[nodiscard]] const nvbench::axis_base &get_axis(std::string_view name, + nvbench::axis_type type) const; + [[nodiscard]] nvbench::axis_base &get_axis(std::string_view name, nvbench::axis_type type); [[nodiscard]] static std::vector generate_default_type_axis_names(std::size_t num_type_axes); @@ -101,7 +96,7 @@ axes_metadata::axes_metadata(nvbench::type_list) { using type_axes_list = nvbench::type_list; constexpr auto num_type_axes = nvbench::tl::size::value; - auto names = axes_metadata::generate_default_type_axis_names(num_type_axes); + auto names = axes_metadata::generate_default_type_axis_names(num_type_axes); auto names_iter = names.begin(); // contents will be moved from nvbench::tl::foreach( @@ -114,8 +109,7 @@ axes_metadata::axes_metadata(nvbench::type_list) // The word "type" appears 6 times in the next line. // Every. Single. Token. typedef typename decltype(wrapped_type)::type type_list; - auto axis = std::make_unique(std::move(*names_iter++), - type_axis_index); + auto axis = std::make_unique(std::move(*names_iter++), type_axis_index); axis->template set_inputs(); axes.push_back(std::move(axis)); }); diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx index 044bc91f..ef51a964 100644 --- a/nvbench/axes_metadata.cxx +++ b/nvbench/axes_metadata.cxx @@ -64,9 +64,7 @@ try auto &axis = *m_axes[i]; if (axis.get_type() != nvbench::axis_type::type) { - NVBENCH_THROW(std::runtime_error, - "Number of names exceeds number of type axes ({})", - i); + NVBENCH_THROW(std::runtime_error, "Number of names exceeds number of type axes ({})", i); } axis.set_name(std::move(names[i])); @@ -81,8 +79,7 @@ catch (std::exception &e) names); } -void axes_metadata::add_float64_axis(std::string name, - std::vector data) +void axes_metadata::add_float64_axis(std::string name, std::vector data) { auto axis = std::make_unique(std::move(name)); axis->set_inputs(std::move(data)); @@ -98,8 +95,7 @@ void axes_metadata::add_int64_axis(std::string name, m_axes.push_back(std::move(axis)); } -void axes_metadata::add_string_axis(std::string name, - std::vector data) +void axes_metadata::add_string_axis(std::string name, std::vector data) { auto axis = std::make_unique(std::move(name)); axis->set_inputs(std::move(data)); @@ -188,10 +184,9 @@ nvbench::type_axis &axes_metadata::get_type_axis(std::size_t index) const axis_base &axes_metadata::get_axis(std::string_view name) const { - auto iter = - std::find_if(m_axes.cbegin(), m_axes.cend(), [&name](const auto &axis) { - return axis->get_name() == name; - }); + auto iter = std::find_if(m_axes.cbegin(), m_axes.cend(), [&name](const auto &axis) { + return axis->get_name() == name; + }); if (iter == m_axes.cend()) { @@ -203,10 +198,9 @@ const axis_base &axes_metadata::get_axis(std::string_view name) const axis_base &axes_metadata::get_axis(std::string_view name) { - auto iter = - std::find_if(m_axes.begin(), m_axes.end(), [&name](const auto &axis) { - return axis->get_name() == name; - }); + auto iter = std::find_if(m_axes.begin(), m_axes.end(), [&name](const auto &axis) { + return axis->get_name() == name; + }); if (iter == m_axes.end()) { @@ -216,8 +210,7 @@ axis_base &axes_metadata::get_axis(std::string_view name) return **iter; } -const axis_base &axes_metadata::get_axis(std::string_view name, - nvbench::axis_type type) const +const axis_base &axes_metadata::get_axis(std::string_view name, nvbench::axis_type type) const { const auto &axis = this->get_axis(name); if (axis.get_type() != type) @@ -231,8 +224,7 @@ const axis_base &axes_metadata::get_axis(std::string_view name, return axis; } -axis_base &axes_metadata::get_axis(std::string_view name, - nvbench::axis_type type) +axis_base &axes_metadata::get_axis(std::string_view name, nvbench::axis_type type) { auto &axis = this->get_axis(name); if (axis.get_type() != type) @@ -246,8 +238,7 @@ axis_base &axes_metadata::get_axis(std::string_view name, return axis; } -std::vector -axes_metadata::generate_default_type_axis_names(std::size_t num_type_axes) +std::vector axes_metadata::generate_default_type_axis_names(std::size_t num_type_axes) { switch (num_type_axes) { diff --git a/nvbench/axis_base.cuh b/nvbench/axis_base.cuh index 712172f4..0760f702 100644 --- a/nvbench/axis_base.cuh +++ b/nvbench/axis_base.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace nvbench @@ -47,10 +48,7 @@ struct axis_base [[nodiscard]] axis_type get_type() const { return m_type; } - [[nodiscard]] std::string_view get_type_as_string() const - { - return axis_type_to_string(m_type); - } + [[nodiscard]] std::string_view get_type_as_string() const { return axis_type_to_string(m_type); } [[nodiscard]] std::string_view get_flags_as_string() const { @@ -93,16 +91,12 @@ inline std::string_view axis_type_to_string(axis_type type) { case axis_type::type: return "type"; - break; case axis_type::int64: return "int64"; - break; case axis_type::float64: return "float64"; - break; case axis_type::string: return "string"; - break; } throw std::runtime_error{"nvbench::axis_type_to_string Invalid axis_type."}; } diff --git a/nvbench/axis_base.cxx b/nvbench/axis_base.cxx index 6d0bd4df..166f1bae 100644 --- a/nvbench/axis_base.cxx +++ b/nvbench/axis_base.cxx @@ -23,9 +23,6 @@ namespace nvbench axis_base::~axis_base() = default; -std::unique_ptr axis_base::clone() const -{ - return this->do_clone(); -} +std::unique_ptr axis_base::clone() const { return this->do_clone(); } } // namespace nvbench diff --git a/nvbench/benchmark.cuh b/nvbench/benchmark.cuh index 5e050d1c..a226070b 100644 --- a/nvbench/benchmark.cuh +++ b/nvbench/benchmark.cuh @@ -57,18 +57,14 @@ struct benchmark final : public benchmark_base using type_axes = TypeAxes; using type_configs = nvbench::tl::cartesian_product; - static constexpr std::size_t num_type_configs = - nvbench::tl::size{}; + static constexpr std::size_t num_type_configs = nvbench::tl::size{}; benchmark() : benchmark_base(type_axes{}) {} private: - std::unique_ptr do_clone() const final - { - return std::make_unique(); - } + std::unique_ptr do_clone() const final { return std::make_unique(); } void do_set_type_axes_names(std::vector names) final { diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 3a16408c..170b942a 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -20,8 +20,8 @@ #include #include -#include #include +#include #include // reference_wrapper, ref #include @@ -52,7 +52,6 @@ struct benchmark_base template explicit benchmark_base(TypeAxes type_axes) : m_axes(type_axes) - , m_devices(nvbench::device_manager::get().get_devices()) {} virtual ~benchmark_base(); @@ -80,32 +79,28 @@ struct benchmark_base return *this; } - benchmark_base &add_float64_axis(std::string name, - std::vector data) + benchmark_base &add_float64_axis(std::string name, std::vector data) { m_axes.add_float64_axis(std::move(name), std::move(data)); return *this; } - benchmark_base &add_int64_axis( - std::string name, - std::vector data, - nvbench::int64_axis_flags flags = nvbench::int64_axis_flags::none) + benchmark_base &add_int64_axis(std::string name, + std::vector data, + nvbench::int64_axis_flags flags = nvbench::int64_axis_flags::none) { m_axes.add_int64_axis(std::move(name), std::move(data), flags); return *this; } - benchmark_base &add_int64_power_of_two_axis(std::string name, - std::vector data) + benchmark_base &add_int64_power_of_two_axis(std::string name, std::vector data) { return this->add_int64_axis(std::move(name), std::move(data), nvbench::int64_axis_flags::power_of_two); } - benchmark_base &add_string_axis(std::string name, - std::vector data) + benchmark_base &add_string_axis(std::string name, std::vector data) { m_axes.add_string_axis(std::move(name), std::move(data)); return *this; @@ -133,48 +128,30 @@ struct benchmark_base return *this; } - [[nodiscard]] const std::vector &get_devices() const - { - return m_devices; - } + [[nodiscard]] const std::vector &get_devices() const { return m_devices; } [[nodiscard]] nvbench::axes_metadata &get_axes() { return m_axes; } - [[nodiscard]] const nvbench::axes_metadata &get_axes() const - { - return m_axes; - } + [[nodiscard]] const nvbench::axes_metadata &get_axes() const { return m_axes; } // Computes the number of configs in the benchmark. // Unlike get_states().size(), this method may be used prior to calling run(). [[nodiscard]] std::size_t get_config_count() const; // Is empty until run() is called. - [[nodiscard]] const std::vector &get_states() const - { - return m_states; - } + [[nodiscard]] const std::vector &get_states() const { return m_states; } [[nodiscard]] std::vector &get_states() { return m_states; } void run() { this->do_run(); } - void set_printer(nvbench::printer_base &printer) - { - m_printer = std::ref(printer); - } + void set_printer(nvbench::printer_base &printer) { m_printer = std::ref(printer); } void clear_printer() { m_printer = std::nullopt; } - [[nodiscard]] optional_ref get_printer() const - { - return m_printer; - } + [[nodiscard]] optional_ref get_printer() const { return m_printer; } /// Execute at least this many trials per measurement. @{ - [[nodiscard]] nvbench::int64_t get_min_samples() const - { - return m_min_samples; - } + [[nodiscard]] nvbench::int64_t get_min_samples() const { return m_min_samples; } benchmark_base &set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; @@ -193,7 +170,7 @@ struct benchmark_base } /// @} - /// If true, the benchmark does not use the blocking_kernel. This is intended + /// If true, the benchmark does not use the blocking_kernel. This is intended /// for use with external profiling tools. @{ [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; } benchmark_base &set_disable_blocking_kernel(bool v) @@ -203,22 +180,30 @@ struct benchmark_base } /// @} - /// Accumulate at least this many seconds of timing data per measurement. @{ - [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; } + /// Accumulate at least this many seconds of timing data per measurement. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_min_time() const + { + return m_criterion_params.get_float64("min-time"); + } benchmark_base &set_min_time(nvbench::float64_t min_time) { - m_min_time = min_time; + m_criterion_params.set_float64("min-time", min_time); return *this; } /// @} /// Specify the maximum amount of noise if a measurement supports noise. /// Noise is the relative standard deviation: - /// `noise = stdev / mean_time`. @{ - [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; } + /// `noise = stdev / mean_time`. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_max_noise() const + { + return m_criterion_params.get_float64("max-noise"); + } benchmark_base &set_max_noise(nvbench::float64_t max_noise) { - m_max_noise = max_noise; + m_criterion_params.set_float64("max-noise", max_noise); return *this; } /// @} @@ -252,6 +237,19 @@ struct benchmark_base } /// @} + [[nodiscard]] nvbench::criterion_params& get_criterion_params() { return m_criterion_params; } + [[nodiscard]] const nvbench::criterion_params& get_criterion_params() const { return m_criterion_params; } + + /// Control the stopping criterion for the measurement loop. + /// @{ + [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; } + benchmark_base &set_stopping_criterion(std::string criterion) + { + m_stopping_criterion = std::move(criterion); + return *this; + } + /// @} + protected: friend struct nvbench::runner_base; @@ -269,12 +267,13 @@ protected: bool m_disable_blocking_kernel{false}; nvbench::int64_t m_min_samples{10}; - nvbench::float64_t m_min_time{0.5}; - nvbench::float64_t m_max_noise{0.005}; // 0.5% relative standard deviation nvbench::float64_t m_skip_time{-1.}; nvbench::float64_t m_timeout{15.}; + nvbench::criterion_params m_criterion_params; + std::string m_stopping_criterion{"stdrel"}; + private: // route these through virtuals so the templated subclass can inject type info virtual std::unique_ptr do_clone() const = 0; diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx index 2d08fdbd..6e89fd3d 100644 --- a/nvbench/benchmark_base.cxx +++ b/nvbench/benchmark_base.cxx @@ -34,13 +34,14 @@ std::unique_ptr benchmark_base::clone() const result->m_axes = m_axes; result->m_devices = m_devices; - result->m_min_samples = m_min_samples; - result->m_min_time = m_min_time; - result->m_max_noise = m_max_noise; + result->m_min_samples = m_min_samples; + result->m_criterion_params = m_criterion_params; result->m_skip_time = m_skip_time; result->m_timeout = m_timeout; + result->m_stopping_criterion = m_stopping_criterion; + return result; } @@ -68,8 +69,7 @@ std::size_t benchmark_base::get_config_count() const std::size_t{1}, std::multiplies<>{}, [](const auto &axis_ptr) { - if (const auto *type_axis_ptr = - dynamic_cast(axis_ptr.get()); + if (const auto *type_axis_ptr = dynamic_cast(axis_ptr.get()); type_axis_ptr != nullptr) { return type_axis_ptr->get_active_count(); diff --git a/nvbench/benchmark_manager.cuh b/nvbench/benchmark_manager.cuh index 39b1717c..51fab18e 100644 --- a/nvbench/benchmark_manager.cuh +++ b/nvbench/benchmark_manager.cuh @@ -31,14 +31,22 @@ namespace nvbench */ struct benchmark_manager { - using benchmark_vector = - std::vector>; + using benchmark_vector = std::vector>; /** * @return The singleton benchmark_manager instance. */ [[nodiscard]] static benchmark_manager &get(); + /** + * Setup any default values for the benchmarks. Invoked from `main`. + * + * Specifically, any CUDA calls (e.g. cudaGetDeviceProperties, etc) needed to initialize the + * benchmarks should be done here to avoid creating a CUDA context before we configure the CUDA + * environment in `main`. + */ + void initialize(); + /** * Register a new benchmark. */ @@ -53,25 +61,21 @@ struct benchmark_manager * Get a non-mutable reference to benchmark with the specified name/index. * @{ */ - [[nodiscard]] const benchmark_base & - get_benchmark(const std::string &name) const; + [[nodiscard]] const benchmark_base &get_benchmark(const std::string &name) const; [[nodiscard]] const benchmark_base &get_benchmark(std::size_t idx) const { return *m_benchmarks.at(idx); } /**@}*/ - [[nodiscard]] const benchmark_vector &get_benchmarks() const - { - return m_benchmarks; - }; + [[nodiscard]] const benchmark_vector &get_benchmarks() const { return m_benchmarks; }; private: - benchmark_manager() = default; - benchmark_manager(const benchmark_manager &) = delete; - benchmark_manager(benchmark_manager &&) = delete; + benchmark_manager() = default; + benchmark_manager(const benchmark_manager &) = delete; + benchmark_manager(benchmark_manager &&) = delete; benchmark_manager &operator=(const benchmark_manager &) = delete; - benchmark_manager &operator=(benchmark_manager &&) = delete; + benchmark_manager &operator=(benchmark_manager &&) = delete; benchmark_vector m_benchmarks; }; diff --git a/nvbench/benchmark_manager.cxx b/nvbench/benchmark_manager.cxx index 2a0ca603..5df702db 100644 --- a/nvbench/benchmark_manager.cxx +++ b/nvbench/benchmark_manager.cxx @@ -18,6 +18,7 @@ #include +#include #include #include @@ -34,6 +35,15 @@ benchmark_manager &benchmark_manager::get() return the_manager; } +void benchmark_manager::initialize() +{ + const auto& mgr = device_manager::get(); + for (auto& bench : m_benchmarks) + { + bench->set_devices(mgr.get_devices()); + } +} + benchmark_base &benchmark_manager::add(std::unique_ptr bench) { m_benchmarks.push_back(std::move(bench)); @@ -43,21 +53,18 @@ benchmark_base &benchmark_manager::add(std::unique_ptr bench) benchmark_manager::benchmark_vector benchmark_manager::clone_benchmarks() const { benchmark_vector result(m_benchmarks.size()); - std::transform(m_benchmarks.cbegin(), - m_benchmarks.cend(), - result.begin(), - [](const auto &bench) { return bench->clone(); }); + std::transform(m_benchmarks.cbegin(), m_benchmarks.cend(), result.begin(), [](const auto &bench) { + return bench->clone(); + }); return result; } -const benchmark_base & -benchmark_manager::get_benchmark(const std::string &name) const +const benchmark_base &benchmark_manager::get_benchmark(const std::string &name) const { - auto iter = std::find_if(m_benchmarks.cbegin(), - m_benchmarks.cend(), - [&name](const auto &bench_ptr) { - return bench_ptr->get_name() == name; - }); + auto iter = + std::find_if(m_benchmarks.cbegin(), m_benchmarks.cend(), [&name](const auto &bench_ptr) { + return bench_ptr->get_name() == name; + }); if (iter == m_benchmarks.cend()) { NVBENCH_THROW(std::out_of_range, "No benchmark named '{}'.", name); diff --git a/nvbench/blocking_kernel.cu b/nvbench/blocking_kernel.cu index 1ee5855c..f3478331 100644 --- a/nvbench/blocking_kernel.cu +++ b/nvbench/blocking_kernel.cu @@ -42,8 +42,8 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag, nvbench::float64_t timeout) { const auto start_point = cuda::std::chrono::high_resolution_clock::now(); - const auto timeout_ns = cuda::std::chrono::nanoseconds( - static_cast(timeout * 1e9)); + const auto timeout_ns = + cuda::std::chrono::nanoseconds(static_cast(timeout * 1e9)); const auto timeout_point = start_point + timeout_ns; const bool use_timeout = timeout >= 0.; @@ -57,41 +57,40 @@ __global__ void block_stream(const volatile nvbench::int32_t *flag, { *timeout_flag = 1; __threadfence_system(); // Ensure timeout flag visibility on host. - printf( - "\n" - "######################################################################\n" - "##################### Possible Deadlock Detected #####################\n" - "######################################################################\n" - "\n" - "Forcing unblock: The current measurement appears to have deadlocked\n" - "and the results cannot be trusted.\n" - "\n" - "This happens when the KernelLauncher synchronizes the CUDA device.\n" - "If this is the case, pass the `sync` exec_tag to the `exec` call:\n" - "\n" - " state.exec(); // Deadlock\n" - " state.exec(nvbench::exec_tag::sync, ); // Safe\n" - "\n" - "This tells NVBench about the sync so it can run the benchmark safely.\n" - "\n" - "If the KernelLauncher does not synchronize but has a very long \n" - "execution time, this may be a false positive. If so, disable this\n" - "check with:\n" - "\n" - " state.set_blocking_kernel_timeout(-1);\n" - "\n" - "The current timeout is set to %0.5g seconds.\n" - "\n" - "For more information, see the 'Benchmarks that sync' section of the\n" - "NVBench documentation.\n" - "\n" - "If this happens while profiling with an external tool,\n" - "pass the `--disable-blocking-kernel` flag or the `--profile` flag\n" - "(to also only run the benchmark once) to the executable.\n" - "\n" - "For more information, see the 'Benchmark Properties' section of the\n" - "NVBench documentation.\n\n", - timeout); + printf("\n" + "######################################################################\n" + "##################### Possible Deadlock Detected #####################\n" + "######################################################################\n" + "\n" + "Forcing unblock: The current measurement appears to have deadlocked\n" + "and the results cannot be trusted.\n" + "\n" + "This happens when the KernelLauncher synchronizes the CUDA device.\n" + "If this is the case, pass the `sync` exec_tag to the `exec` call:\n" + "\n" + " state.exec(); // Deadlock\n" + " state.exec(nvbench::exec_tag::sync, ); // Safe\n" + "\n" + "This tells NVBench about the sync so it can run the benchmark safely.\n" + "\n" + "If the KernelLauncher does not synchronize but has a very long \n" + "execution time, this may be a false positive. If so, disable this\n" + "check with:\n" + "\n" + " state.set_blocking_kernel_timeout(-1);\n" + "\n" + "The current timeout is set to %0.5g seconds.\n" + "\n" + "For more information, see the 'Benchmarks that sync' section of the\n" + "NVBench documentation.\n" + "\n" + "If this happens while profiling with an external tool,\n" + "pass the `--disable-blocking-kernel` flag or the `--profile` flag\n" + "(to also only run the benchmark once) to the executable.\n" + "\n" + "For more information, see the 'Benchmark Properties' section of the\n" + "NVBench documentation.\n\n", + timeout); } } @@ -102,15 +101,11 @@ namespace nvbench blocking_kernel::blocking_kernel() { - NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_flag, - sizeof(m_host_flag), - cudaHostRegisterMapped)); + NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_flag, sizeof(m_host_flag), cudaHostRegisterMapped)); NVBENCH_CUDA_CALL(cudaHostGetDevicePointer(&m_device_flag, &m_host_flag, 0)); - NVBENCH_CUDA_CALL(cudaHostRegister(&m_host_timeout_flag, - sizeof(m_host_timeout_flag), - cudaHostRegisterMapped)); NVBENCH_CUDA_CALL( - cudaHostGetDevicePointer(&m_device_timeout_flag, &m_host_timeout_flag, 0)); + cudaHostRegister(&m_host_timeout_flag, sizeof(m_host_timeout_flag), cudaHostRegisterMapped)); + NVBENCH_CUDA_CALL(cudaHostGetDevicePointer(&m_device_timeout_flag, &m_host_timeout_flag, 0)); } blocking_kernel::~blocking_kernel() @@ -119,14 +114,11 @@ blocking_kernel::~blocking_kernel() NVBENCH_CUDA_CALL_NOEXCEPT(cudaHostUnregister(&m_host_timeout_flag)); } -void blocking_kernel::block(const nvbench::cuda_stream &stream, - nvbench::float64_t timeout) +void blocking_kernel::block(const nvbench::cuda_stream &stream, nvbench::float64_t timeout) { m_host_flag = 0; m_host_timeout_flag = 0; - block_stream<<<1, 1, 0, stream>>>(m_device_flag, - m_device_timeout_flag, - timeout); + block_stream<<<1, 1, 0, stream>>>(m_device_flag, m_device_timeout_flag, timeout); } void blocking_kernel::timeout_detected() diff --git a/nvbench/blocking_kernel.cuh b/nvbench/blocking_kernel.cuh index ecbfed8f..13f737ef 100644 --- a/nvbench/blocking_kernel.cuh +++ b/nvbench/blocking_kernel.cuh @@ -97,10 +97,10 @@ struct blocking_kernel } // move-only - blocking_kernel(const blocking_kernel &) = delete; - blocking_kernel(blocking_kernel &&) = default; + blocking_kernel(const blocking_kernel &) = delete; + blocking_kernel(blocking_kernel &&) = default; blocking_kernel &operator=(const blocking_kernel &) = delete; - blocking_kernel &operator=(blocking_kernel &&) = default; + blocking_kernel &operator=(blocking_kernel &&) = default; private: nvbench::int32_t m_host_flag{}; diff --git a/nvbench/callable.cuh b/nvbench/callable.cuh index ce7fff02..2cd1f15f 100644 --- a/nvbench/callable.cuh +++ b/nvbench/callable.cuh @@ -30,35 +30,29 @@ struct state; // Define a simple callable wrapper around a function. This allows the function // to be used as a class template parameter. Intended for use with kernel // generators and `NVBENCH_BENCH` macros. -#define NVBENCH_DEFINE_UNIQUE_CALLABLE(function) \ +#define NVBENCH_DEFINE_UNIQUE_CALLABLE(function) \ NVBENCH_DEFINE_CALLABLE(function, NVBENCH_UNIQUE_IDENTIFIER(function)) -#define NVBENCH_DEFINE_CALLABLE(function, callable_name) \ - struct callable_name \ - { \ - void operator()(nvbench::state &state, nvbench::type_list<>) \ - { \ - function(state); \ - } \ +#define NVBENCH_DEFINE_CALLABLE(function, callable_name) \ + struct callable_name \ + { \ + void operator()(nvbench::state &state, nvbench::type_list<>) { function(state); } \ } -#define NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(function) \ - NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, \ - NVBENCH_UNIQUE_IDENTIFIER(function)) - -#define NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, callable_name) \ - struct callable_name \ - { \ - template \ - void operator()(nvbench::state &state, nvbench::type_list) \ - { \ - function(state, nvbench::type_list{}); \ - } \ +#define NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(function) \ + NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, NVBENCH_UNIQUE_IDENTIFIER(function)) + +#define NVBENCH_DEFINE_CALLABLE_TEMPLATE(function, callable_name) \ + struct callable_name \ + { \ + template \ + void operator()(nvbench::state &state, nvbench::type_list) \ + { \ + function(state, nvbench::type_list{}); \ + } \ } -#define NVBENCH_UNIQUE_IDENTIFIER(prefix) \ - NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, __LINE__) -#define NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, unique_id) \ +#define NVBENCH_UNIQUE_IDENTIFIER(prefix) NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, __LINE__) +#define NVBENCH_UNIQUE_IDENTIFIER_IMPL1(prefix, unique_id) \ NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) -#define NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) \ - prefix##_line_##unique_id +#define NVBENCH_UNIQUE_IDENTIFIER_IMPL2(prefix, unique_id) prefix##_line_##unique_id diff --git a/nvbench/cpu_timer.cuh b/nvbench/cpu_timer.cuh index 09d3c54e..d4ba6553 100644 --- a/nvbench/cpu_timer.cuh +++ b/nvbench/cpu_timer.cuh @@ -30,27 +30,20 @@ struct cpu_timer __forceinline__ cpu_timer() = default; // move-only - cpu_timer(const cpu_timer &) = delete; - cpu_timer(cpu_timer &&) = default; + cpu_timer(const cpu_timer &) = delete; + cpu_timer(cpu_timer &&) = default; cpu_timer &operator=(const cpu_timer &) = delete; - cpu_timer &operator=(cpu_timer &&) = default; + cpu_timer &operator=(cpu_timer &&) = default; - __forceinline__ void start() - { - m_start = std::chrono::high_resolution_clock::now(); - } + __forceinline__ void start() { m_start = std::chrono::high_resolution_clock::now(); } - __forceinline__ void stop() - { - m_stop = std::chrono::high_resolution_clock::now(); - } + __forceinline__ void stop() { m_stop = std::chrono::high_resolution_clock::now(); } // In seconds: [[nodiscard]] __forceinline__ nvbench::float64_t get_duration() { const auto duration = m_stop - m_start; - const auto ns = - std::chrono::duration_cast(duration).count(); + const auto ns = std::chrono::duration_cast(duration).count(); return static_cast(ns) * (1e-9); } diff --git a/nvbench/create.cuh b/nvbench/create.cuh index 7aed1b7e..902d6c38 100644 --- a/nvbench/create.cuh +++ b/nvbench/create.cuh @@ -27,19 +27,17 @@ #define NVBENCH_TYPE_AXES(...) nvbench::type_list<__VA_ARGS__> -#define NVBENCH_BENCH(KernelGenerator) \ - NVBENCH_DEFINE_UNIQUE_CALLABLE(KernelGenerator); \ - nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ - nvbench::benchmark_manager::get() \ - .add(std::make_unique< \ - nvbench::benchmark>()) \ +#define NVBENCH_BENCH(KernelGenerator) \ + NVBENCH_DEFINE_UNIQUE_CALLABLE(KernelGenerator); \ + nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ + nvbench::benchmark_manager::get() \ + .add(std::make_unique>()) \ .set_name(#KernelGenerator) -#define NVBENCH_BENCH_TYPES(KernelGenerator, TypeAxes) \ - NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(KernelGenerator); \ - nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ - nvbench::benchmark_manager::get() \ - .add(std::make_unique< \ - nvbench::benchmark>()) \ +#define NVBENCH_BENCH_TYPES(KernelGenerator, TypeAxes) \ + NVBENCH_DEFINE_UNIQUE_CALLABLE_TEMPLATE(KernelGenerator); \ + nvbench::benchmark_base &NVBENCH_UNIQUE_IDENTIFIER(obj_##KernelGenerator) = \ + nvbench::benchmark_manager::get() \ + .add(std::make_unique< \ + nvbench::benchmark>()) \ .set_name(#KernelGenerator) diff --git a/nvbench/criterion_manager.cuh b/nvbench/criterion_manager.cuh new file mode 100644 index 00000000..6c60993c --- /dev/null +++ b/nvbench/criterion_manager.cuh @@ -0,0 +1,65 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +namespace nvbench +{ + +class criterion_manager +{ + std::unordered_map> m_map; + + criterion_manager(); + +public: + /** + * @return The singleton criterion_manager instance. + */ + static criterion_manager& get(); + + /** + * Register a new stopping criterion. + */ + nvbench::stopping_criterion_base& add(std::unique_ptr criterion); + nvbench::stopping_criterion_base& get_criterion(const std::string& name); + const nvbench::stopping_criterion_base& get_criterion(const std::string& name) const; + + using params_description = std::vector>; + params_description get_params_description() const; +}; + +/** + * Given a stopping criterion type `TYPE`, registers it in the criterion manager + * + * See the `custom_criterion.cu` example for usage. + */ +#define NVBENCH_REGISTER_CRITERION(TYPE) \ + static nvbench::stopping_criterion_base &NVBENCH_UNIQUE_IDENTIFIER(TYPE) = \ + nvbench::criterion_manager::get().add(std::make_unique()) + +} // namespace nvbench diff --git a/nvbench/criterion_manager.cxx b/nvbench/criterion_manager.cxx new file mode 100644 index 00000000..f4857e9e --- /dev/null +++ b/nvbench/criterion_manager.cxx @@ -0,0 +1,107 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace nvbench +{ + +criterion_manager::criterion_manager() +{ + m_map.emplace("stdrel", std::make_unique()); + m_map.emplace("entropy", std::make_unique()); +} + +criterion_manager &criterion_manager::get() +{ + static criterion_manager registry; + return registry; +} + +stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) +{ + auto iter = m_map.find(name); + if (iter == m_map.end()) + { + NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name); + } + return *iter->second.get(); +} + +const nvbench::stopping_criterion_base& criterion_manager::get_criterion(const std::string& name) const +{ + auto iter = m_map.find(name); + if (iter == m_map.end()) + { + NVBENCH_THROW(std::runtime_error, "No stopping criterion named \"{}\".", name); + } + return *iter->second.get(); +} + +stopping_criterion_base &criterion_manager::add(std::unique_ptr criterion) +{ + const std::string name = criterion->get_name(); + + auto [it, success] = m_map.emplace(name, std::move(criterion)); + + if (!success) + { + NVBENCH_THROW(std::runtime_error, + "Stopping criterion \"{}\" is already registered.", name); + } + + return *it->second.get(); +} + +nvbench::criterion_manager::params_description criterion_manager::get_params_description() const +{ + nvbench::criterion_manager::params_description desc; + + for (auto &[criterion_name, criterion] : m_map) + { + nvbench::criterion_params params = criterion->get_params(); + + for (auto param : params.get_names()) + { + nvbench::named_values::type type = params.get_type(param); + if (std::find_if(desc.begin(), desc.end(), [&](auto d) { + return d.first == param && d.second != type; + }) != desc.end()) + { + NVBENCH_THROW(std::runtime_error, + "Stopping criterion \"{}\" parameter \"{}\" is already used by another " + "criterion with a different type.", + criterion_name, + param); + } + desc.emplace_back(param, type); + } + } + + return desc; +} + +} // namespace nvbench diff --git a/nvbench/csv_printer.cu b/nvbench/csv_printer.cu index 6acb535e..a0a906ef 100644 --- a/nvbench/csv_printer.cu +++ b/nvbench/csv_printer.cu @@ -66,10 +66,8 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) { std::optional device = cur_state.get_device(); - std::string device_id = device ? fmt::to_string(device->get_id()) - : std::string{}; - std::string device_name = device ? std::string{device->get_name()} - : std::string{}; + std::string device_id = device ? fmt::to_string(device->get_id()) : std::string{}; + std::string device_name = device ? std::string{device->get_name()} : std::string{}; table.add_cell(row, "_bench_name", "Benchmark", bench_name); table.add_cell(row, "_device_id", "Device", std::move(device_id)); @@ -88,15 +86,11 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) name + "_axis_pow2_pretty", name + " (pow2)", fmt::format("2^{}", exponent)); - table.add_cell(row, - name + "_axis_plain", - fmt::format("{}", name), - fmt::to_string(value)); + table.add_cell(row, name + "_axis_plain", fmt::format("{}", name), fmt::to_string(value)); } else { - std::string value = std::visit(format_visitor, - axis_values.get_value(name)); + std::string value = std::visit(format_visitor, axis_values.get_value(name)); table.add_cell(row, name + "_axis", name, std::move(value)); } } @@ -117,14 +111,10 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) continue; } const std::string &tag = summ.get_tag(); - const std::string &header = summ.has_value("name") - ? summ.get_string("name") - : tag; - - const std::string hint = summ.has_value("hint") - ? summ.get_string("hint") - : std::string{}; - std::string value = std::visit(format_visitor, summ.get_value("value")); + const std::string &header = summ.has_value("name") ? summ.get_string("name") : tag; + + const std::string hint = summ.has_value("hint") ? summ.get_string("hint") : std::string{}; + std::string value = std::visit(format_visitor, summ.get_value("value")); if (hint == "duration") { table.add_cell(row, tag, header + " (sec)", std::move(value)); @@ -171,9 +161,9 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) std::size_t remaining = table.m_columns.size(); for (const auto &col : table.m_columns) { - fmt::format_to(buffer, "{}{}", col.header, (--remaining == 0) ? "" : ","); + fmt::format_to(std::back_inserter(buffer), "{}{}", col.header, (--remaining == 0) ? "" : ","); } - fmt::format_to(buffer, "\n"); + fmt::format_to(std::back_inserter(buffer), "\n"); } { // Rows @@ -182,12 +172,9 @@ void csv_printer::do_print_benchmark_results(const benchmark_vector &benches) std::size_t remaining = table.m_columns.size(); for (const auto &col : table.m_columns) { - fmt::format_to(buffer, - "{}{}", - col.rows[i], - (--remaining == 0) ? "" : ","); + fmt::format_to(std::back_inserter(buffer), "{}{}", col.rows[i], (--remaining == 0) ? "" : ","); } - fmt::format_to(buffer, "\n"); + fmt::format_to(std::back_inserter(buffer), "\n"); } } diff --git a/nvbench/cuda_call.cu b/nvbench/cuda_call.cu index 6cb304be..662c7593 100644 --- a/nvbench/cuda_call.cu +++ b/nvbench/cuda_call.cu @@ -16,8 +16,8 @@ * limitations under the License. */ -#include #include +#include #include @@ -66,11 +66,7 @@ void throw_error(const std::string &filename, command)); } #else -void throw_error(const std::string &, - std::size_t, - const std::string &, - CUresult) -{} +void throw_error(const std::string &, std::size_t, const std::string &, CUresult) {} #endif void exit_error(const std::string &filename, diff --git a/nvbench/cuda_call.cuh b/nvbench/cuda_call.cuh index f1d6c45b..5b2ae362 100644 --- a/nvbench/cuda_call.cuh +++ b/nvbench/cuda_call.cuh @@ -18,52 +18,45 @@ #pragma once -#include #include +#include #include /// Throws a std::runtime_error if `call` doesn't return `cudaSuccess`. -#define NVBENCH_CUDA_CALL(call) \ - do \ - { \ - const cudaError_t nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != cudaSuccess) \ - { \ - nvbench::cuda_call::throw_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +/// Resets the error with cudaGetLastError(). +#define NVBENCH_CUDA_CALL(call) \ + do \ + { \ + const cudaError_t nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != cudaSuccess) \ + { \ + cudaGetLastError(); \ + nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) /// Throws a std::runtime_error if `call` doesn't return `CUDA_SUCCESS`. -#define NVBENCH_DRIVER_API_CALL(call) \ - do \ - { \ - const CUresult nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != CUDA_SUCCESS) \ - { \ - nvbench::cuda_call::throw_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +#define NVBENCH_DRIVER_API_CALL(call) \ + do \ + { \ + const CUresult nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != CUDA_SUCCESS) \ + { \ + nvbench::cuda_call::throw_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) /// Terminates process with failure status if `call` doesn't return /// `cudaSuccess`. -#define NVBENCH_CUDA_CALL_NOEXCEPT(call) \ - do \ - { \ - const cudaError_t nvbench_cuda_call_error = call; \ - if (nvbench_cuda_call_error != cudaSuccess) \ - { \ - nvbench::cuda_call::exit_error(__FILE__, \ - __LINE__, \ - #call, \ - nvbench_cuda_call_error); \ - } \ +#define NVBENCH_CUDA_CALL_NOEXCEPT(call) \ + do \ + { \ + const cudaError_t nvbench_cuda_call_error = call; \ + if (nvbench_cuda_call_error != cudaSuccess) \ + { \ + nvbench::cuda_call::exit_error(__FILE__, __LINE__, #call, nvbench_cuda_call_error); \ + } \ } while (false) namespace nvbench::cuda_call diff --git a/nvbench/cuda_stream.cuh b/nvbench/cuda_stream.cuh index 6674c279..2c7536c1 100644 --- a/nvbench/cuda_stream.cuh +++ b/nvbench/cuda_stream.cuh @@ -66,10 +66,10 @@ struct cuda_stream ~cuda_stream() = default; // move-only - cuda_stream(const cuda_stream &) = delete; + cuda_stream(const cuda_stream &) = delete; cuda_stream &operator=(const cuda_stream &) = delete; cuda_stream(cuda_stream &&) = default; - cuda_stream &operator=(cuda_stream &&) = default; + cuda_stream &operator=(cuda_stream &&) = default; /** * @return The `cudaStream_t` managed by this `cuda_stream`. diff --git a/nvbench/cuda_timer.cuh b/nvbench/cuda_timer.cuh index 0e022ce1..e1c6e661 100644 --- a/nvbench/cuda_timer.cuh +++ b/nvbench/cuda_timer.cuh @@ -42,10 +42,10 @@ struct cuda_timer } // move-only - cuda_timer(const cuda_timer &) = delete; - cuda_timer(cuda_timer &&) = default; + cuda_timer(const cuda_timer &) = delete; + cuda_timer(cuda_timer &&) = default; cuda_timer &operator=(const cuda_timer &) = delete; - cuda_timer &operator=(cuda_timer &&) = default; + cuda_timer &operator=(cuda_timer &&) = default; __forceinline__ void start(cudaStream_t stream) { diff --git a/nvbench/cupti_profiler.cuh b/nvbench/cupti_profiler.cuh index 6e0e255f..214706a7 100644 --- a/nvbench/cupti_profiler.cuh +++ b/nvbench/cupti_profiler.cuh @@ -21,14 +21,13 @@ #include #include +#include #include #include -#include namespace nvbench::detail { - #ifdef NVBENCH_HAS_CUPTI /** * Pass required metrics in the constructor and organize your code as follows @@ -62,7 +61,7 @@ namespace nvbench::detail */ class cupti_profiler { - bool m_available {}; + bool m_available{}; std::string m_chip_name; // Counter data @@ -87,11 +86,10 @@ public: cupti_profiler(cupti_profiler &&) noexcept; cupti_profiler &operator=(cupti_profiler &&) noexcept; - cupti_profiler(const cupti_profiler &) = delete; + cupti_profiler(const cupti_profiler &) = delete; cupti_profiler &operator=(const cupti_profiler &) = delete; - cupti_profiler(nvbench::device_info device, - std::vector &&metric_names); + cupti_profiler(nvbench::device_info device, std::vector &&metric_names); ~cupti_profiler(); [[nodiscard]] bool is_initialized() const; @@ -125,5 +123,4 @@ private: }; #endif - } // namespace nvbench::detail diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index 6dcd81d7..6233ef0e 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -31,6 +31,7 @@ #include #include +#include namespace nvbench::detail { @@ -53,14 +54,13 @@ void nvpw_call(const NVPA_Status status) { if (status != NVPA_STATUS_SUCCESS) { - NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", status); + NVBENCH_THROW(std::runtime_error, "NVPW call returned error: {}", static_cast>(status)); } } } // namespace -cupti_profiler::cupti_profiler(nvbench::device_info device, - std::vector &&metric_names) +cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector &&metric_names) : m_metric_names(metric_names) , m_device(device) { @@ -154,12 +154,10 @@ class eval_request NVPW_MetricsEvaluator *evaluator_ptr; public: - eval_request(NVPW_MetricsEvaluator *evaluator_ptr, - const std::string &metric_name) + eval_request(NVPW_MetricsEvaluator *evaluator_ptr, const std::string &metric_name) : evaluator_ptr(evaluator_ptr) { - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = - {}; + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params params = {}; params.structSize = NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE; @@ -168,8 +166,7 @@ class eval_request params.pMetricEvalRequest = &request; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; - nvpw_call( - NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); + nvpw_call(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); } [[nodiscard]] std::vector get_raw_dependencies() @@ -178,10 +175,9 @@ class eval_request NVPW_MetricsEvaluator_GetMetricRawDependencies_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE; - params.pMetricsEvaluator = evaluator_ptr; - params.pMetricEvalRequests = &request; + params.structSize = NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE; + params.pMetricsEvaluator = evaluator_ptr; + params.pMetricEvalRequests = &request; params.numMetricEvalRequests = 1; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; params.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest); @@ -211,26 +207,23 @@ class metric_evaluator const std::uint8_t *counter_data_image = nullptr, const std::size_t counter_data_image_size = 0) { - NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params - scratch_buffer_param{}; + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params scratch_buffer_param{}; scratch_buffer_param.structSize = NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE; scratch_buffer_param.pChipName = chip_name.c_str(); scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image; - nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize( - &scratch_buffer_param)); + nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratch_buffer_param)); scratch_buffer.resize(scratch_buffer_param.scratchBufferSize); NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluator_params{}; - evaluator_params.structSize = - NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE; - evaluator_params.scratchBufferSize = scratch_buffer.size(); - evaluator_params.pScratchBuffer = scratch_buffer.data(); - evaluator_params.pChipName = chip_name.c_str(); + evaluator_params.structSize = NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE; + evaluator_params.scratchBufferSize = scratch_buffer.size(); + evaluator_params.pScratchBuffer = scratch_buffer.data(); + evaluator_params.pChipName = chip_name.c_str(); evaluator_params.pCounterAvailabilityImage = counter_availability_image; evaluator_params.pCounterDataImage = counter_data_image; evaluator_params.counterDataImageSize = counter_data_image_size; @@ -247,7 +240,7 @@ class metric_evaluator { NVPW_MetricsEvaluator_Destroy_Params params{}; - params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator_ptr; nvpw_call(NVPW_MetricsEvaluator_Destroy(¶ms)); @@ -259,10 +252,7 @@ class metric_evaluator return {evaluator_ptr, metric_name}; } - [[nodiscard]] operator NVPW_MetricsEvaluator *() const - { - return evaluator_ptr; - } + [[nodiscard]] operator NVPW_MetricsEvaluator *() const { return evaluator_ptr; } }; } // namespace @@ -270,10 +260,10 @@ class metric_evaluator namespace { -[[nodiscard]] std::vector get_raw_metric_requests( - const std::string &chip_name, - const std::vector &metric_names, - const std::uint8_t *counter_availability_image = nullptr) +[[nodiscard]] std::vector +get_raw_metric_requests(const std::string &chip_name, + const std::vector &metric_names, + const std::uint8_t *counter_availability_image = nullptr) { metric_evaluator evaluator(chip_name, counter_availability_image); @@ -282,8 +272,7 @@ namespace for (auto &metric_name : metric_names) { - for (auto &raw_dependency : - evaluator.create_request(metric_name).get_raw_dependencies()) + for (auto &raw_dependency : evaluator.create_request(metric_name).get_raw_dependencies()) { raw_metric_names.push_back(raw_dependency); } @@ -295,10 +284,10 @@ namespace for (auto &raw_name : raw_metric_names) { NVPA_RawMetricRequest metricRequest{}; - metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE; - metricRequest.pMetricName = raw_name; - metricRequest.isolated = true; - metricRequest.keepInstances = true; + metricRequest.structSize = NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE; + metricRequest.pMetricName = raw_name; + metricRequest.isolated = true; + metricRequest.keepInstances = true; raw_requests.push_back(metricRequest); } @@ -309,12 +298,11 @@ class metrics_config { bool initialized{}; - void create(const std::string &chip_name, - const std::uint8_t *availability_image) + void create(const std::string &chip_name, const std::uint8_t *availability_image) { NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; - params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE; + params.structSize = NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE; params.activityKind = NVPA_ACTIVITY_KIND_PROFILER; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = availability_image; @@ -329,9 +317,8 @@ class metrics_config { NVPW_RawMetricsConfig_SetCounterAvailability_Params params{}; - params.structSize = - NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE; - params.pRawMetricsConfig = raw_metrics_config; + params.structSize = NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE; + params.pRawMetricsConfig = raw_metrics_config; params.pCounterAvailabilityImage = availability_image; nvpw_call(NVPW_RawMetricsConfig_SetCounterAvailability(¶ms)); @@ -341,7 +328,7 @@ class metrics_config { NVPW_RawMetricsConfig_BeginPassGroup_Params params{}; - params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms)); @@ -351,7 +338,7 @@ class metrics_config { NVPW_RawMetricsConfig_AddMetrics_Params params{}; - params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -363,7 +350,7 @@ class metrics_config { NVPW_RawMetricsConfig_EndPassGroup_Params params{}; - params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms)); @@ -373,8 +360,7 @@ class metrics_config { NVPW_RawMetricsConfig_GenerateConfigImage_Params params{}; - params.structSize = - NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; nvpw_call(NVPW_RawMetricsConfig_GenerateConfigImage(¶ms)); @@ -398,7 +384,7 @@ class metrics_config { NVPW_RawMetricsConfig_GetConfigImage_Params params{}; - params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -419,7 +405,7 @@ class metrics_config { NVPW_RawMetricsConfig_Destroy_Params params{}; - params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE; params.pRawMetricsConfig = raw_metrics_config; NVPW_RawMetricsConfig_Destroy(¶ms); @@ -433,13 +419,12 @@ class metrics_config void cupti_profiler::initialize_config_image() { - m_config_image = - metrics_config(m_chip_name, - get_raw_metric_requests(m_chip_name, - m_metric_names, - m_availability_image.data()), - m_availability_image.data()) - .get_config_image(); + m_config_image = metrics_config(m_chip_name, + get_raw_metric_requests(m_chip_name, + m_metric_names, + m_availability_image.data()), + m_availability_image.data()) + .get_config_image(); } namespace @@ -450,12 +435,11 @@ class counter_data_builder bool initialized{}; public: - counter_data_builder(const std::string &chip_name, - const std::uint8_t *pCounterAvailabilityImage) + counter_data_builder(const std::string &chip_name, const std::uint8_t *pCounterAvailabilityImage) { NVPW_CUDA_CounterDataBuilder_Create_Params params{}; - params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE; + params.structSize = NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE; params.pChipName = chip_name.c_str(); params.pCounterAvailabilityImage = pCounterAvailabilityImage; @@ -471,7 +455,7 @@ class counter_data_builder { NVPW_CounterDataBuilder_Destroy_Params params{}; - params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE; params.pCounterDataBuilder = builder; NVPW_CounterDataBuilder_Destroy(¶ms); @@ -488,16 +472,14 @@ void cupti_profiler::initialize_counter_data_prefix_image() const std::uint8_t *counter_availability_image = nullptr; std::vector raw_metric_requests = - get_raw_metric_requests(m_chip_name, - m_metric_names, - counter_availability_image); + get_raw_metric_requests(m_chip_name, m_metric_names, counter_availability_image); counter_data_builder data_builder(m_chip_name, counter_availability_image); { NVPW_CounterDataBuilder_AddMetrics_Params params{}; - params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.pRawMetricRequests = raw_metric_requests.data(); params.numMetricRequests = raw_metric_requests.size(); @@ -508,8 +490,7 @@ void cupti_profiler::initialize_counter_data_prefix_image() { NVPW_CounterDataBuilder_GetCounterDataPrefix_Params params{}; - params.structSize = - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE; + params.structSize = NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE; params.pCounterDataBuilder = data_builder.builder; params.bytesAllocated = 0; params.pBuffer = nullptr; @@ -532,11 +513,9 @@ get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options) { CUpti_Profiler_CounterDataImage_CalculateSize_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE; - params.pOptions = options; - params.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; + params.structSize = CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE; + params.pOptions = options; + params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; cupti_call(cuptiProfilerCounterDataImageCalculateSize(¶ms)); return params.counterDataImageSize; @@ -559,12 +538,10 @@ void cupti_profiler::initialize_counter_data_image() { CUpti_Profiler_CounterDataImage_Initialize_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE; - params.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; - params.pOptions = &counter_data_image_options; - params.counterDataImageSize = m_data_image.size(); + params.structSize = CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE; + params.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; + params.pOptions = &counter_data_image_options; + params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; cupti_call(cuptiProfilerCounterDataImageInitialize(¶ms)); @@ -578,8 +555,7 @@ void cupti_profiler::initialize_counter_data_image() params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; - cupti_call( - cuptiProfilerCounterDataImageCalculateScratchBufferSize(¶ms)); + cupti_call(cuptiProfilerCounterDataImageCalculateScratchBufferSize(¶ms)); m_data_scratch_buffer.resize(params.counterDataScratchBufferSize); } @@ -587,8 +563,7 @@ void cupti_profiler::initialize_counter_data_image() { CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params params{}; - params.structSize = - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE; + params.structSize = CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; params.counterDataScratchBufferSize = m_data_scratch_buffer.size(); @@ -608,17 +583,14 @@ cupti_profiler::~cupti_profiler() } } -bool cupti_profiler::is_initialized() const -{ - return m_available; -} +bool cupti_profiler::is_initialized() const { return m_available; } void cupti_profiler::prepare_user_loop() { { CUpti_Profiler_BeginSession_Params params{}; - params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE; + params.structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE; params.ctx = nullptr; params.counterDataImageSize = m_data_image.size(); params.pCounterDataImage = &m_data_image[0]; @@ -735,9 +707,7 @@ std::vector cupti_profiler::get_counter_values() if (params.numRanges != 1) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Something's gone wrong, one range is expected"); + NVBENCH_THROW(std::runtime_error, "{}", "Something's gone wrong, one range is expected"); } } @@ -752,8 +722,7 @@ std::vector cupti_profiler::get_counter_values() { NVPW_MetricsEvaluator_SetDeviceAttributes_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE; + params.structSize = NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE; params.pMetricsEvaluator = evaluator; params.pCounterDataImage = m_data_image.data(); params.counterDataImageSize = m_data_image.size(); @@ -764,11 +733,10 @@ std::vector cupti_profiler::get_counter_values() { NVPW_MetricsEvaluator_EvaluateToGpuValues_Params params{}; - params.structSize = - NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE; - params.pMetricsEvaluator = evaluator; - params.pMetricEvalRequests = &request.request; - params.numMetricEvalRequests = 1; + params.structSize = NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE; + params.pMetricsEvaluator = evaluator; + params.pMetricEvalRequests = &request.request; + params.numMetricEvalRequests = 1; params.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE; params.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest); params.pCounterDataImage = m_data_image.data(); diff --git a/nvbench/detail/device_scope.cuh b/nvbench/detail/device_scope.cuh index de3a55a3..c924beed 100644 --- a/nvbench/detail/device_scope.cuh +++ b/nvbench/detail/device_scope.cuh @@ -39,9 +39,9 @@ struct [[maybe_unused]] device_scope ~device_scope() { NVBENCH_CUDA_CALL(cudaSetDevice(m_old_device_id)); } // move-only - device_scope(device_scope &&) = default; - device_scope &operator=(device_scope &&) = default; - device_scope(const device_scope &) = delete; + device_scope(device_scope &&) = default; + device_scope &operator=(device_scope &&) = default; + device_scope(const device_scope &) = delete; device_scope &operator=(const device_scope &) = delete; private: diff --git a/nvbench/detail/entropy_criterion.cuh b/nvbench/detail/entropy_criterion.cuh new file mode 100644 index 00000000..b0e4ebe0 --- /dev/null +++ b/nvbench/detail/entropy_criterion.cuh @@ -0,0 +1,55 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace nvbench::detail +{ + +class entropy_criterion final : public stopping_criterion_base +{ + // state + nvbench::int64_t m_total_samples{}; + nvbench::float64_t m_total_cuda_time{}; + std::vector> m_freq_tracker; + + // TODO The window size should be user-configurable + nvbench::detail::ring_buffer m_entropy_tracker{299}; + + // Used to avoid re-allocating temporary memory + std::vector m_probabilities; + + nvbench::float64_t compute_entropy(); + +public: + entropy_criterion(); + +protected: + virtual void do_initialize() override; + virtual void do_add_measurement(nvbench::float64_t measurement) override; + virtual bool do_is_finished() override; + +}; + +} // namespace nvbench::detail diff --git a/nvbench/detail/entropy_criterion.cxx b/nvbench/detail/entropy_criterion.cxx new file mode 100644 index 00000000..6d9ba8cd --- /dev/null +++ b/nvbench/detail/entropy_criterion.cxx @@ -0,0 +1,137 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + + +namespace nvbench::detail +{ + +entropy_criterion::entropy_criterion() + : stopping_criterion_base{"entropy", {{"max-angle", 0.048}, {"min-r2", 0.36}}} +{ + m_freq_tracker.reserve(m_entropy_tracker.capacity() * 2); + m_probabilities.reserve(m_entropy_tracker.capacity() * 2); +} + +void entropy_criterion::do_initialize() +{ + m_total_samples = 0; + m_total_cuda_time = 0.0; + m_entropy_tracker.clear(); + m_freq_tracker.clear(); +} + +nvbench::float64_t entropy_criterion::compute_entropy() +{ + const std::size_t n = m_freq_tracker.size(); + if (n == 0) + { + return 0.0; + } + + m_probabilities.resize(n); + for (std::size_t i = 0; i < n; i++) + { + m_probabilities[i] = static_cast(m_freq_tracker[i].second) / + static_cast(m_total_samples); + } + + nvbench::float64_t entropy{}; + for (nvbench::float64_t p : m_probabilities) + { + entropy -= p * std::log2(p); + } + + return entropy; +} + +void entropy_criterion::do_add_measurement(nvbench::float64_t measurement) +{ + m_total_samples++; + m_total_cuda_time += measurement; + + { + auto key = measurement; + constexpr bool bin_keys = false; + + if (bin_keys) + { + const auto resolution_us = 0.5; + const auto resulution_s = resolution_us / 1'000'000; + const auto epsilon = resulution_s * 2; + key = std::round(key / epsilon) * epsilon; + } + + // This approach is about 3x faster than `std::{unordered_,}map` + // Up to 100k samples, only about 20% slower than corresponding stdrel method + auto it = std::lower_bound(m_freq_tracker.begin(), + m_freq_tracker.end(), + std::make_pair(key, nvbench::int64_t{})); + + if (it != m_freq_tracker.end() && it->first == key) + { + it->second += 1; + } + else + { + m_freq_tracker.insert(it, std::make_pair(key, nvbench::int64_t{1})); + } + } + + m_entropy_tracker.push_back(compute_entropy()); +} + +bool entropy_criterion::do_is_finished() +{ + if (m_entropy_tracker.size() < 2) + { + return false; + } + + // Even number of samples is used to reduce the overhead and not required to compute entropy. + // This makes `is_finished()` about 20% faster than corresponding stdrel method. + if (m_total_samples % 2 != 0) + { + return false; + } + + auto begin = m_entropy_tracker.cbegin(); + auto end = m_entropy_tracker.cend(); + auto mean = statistics::compute_mean(begin, end); + + const auto [slope, intercept] = statistics::compute_linear_regression(begin, end, mean); + + if (statistics::slope2deg(slope) > m_params.get_float64("max-angle")) + { + return false; + } + + const auto r2 = statistics::compute_r2(begin, end, mean, slope, intercept); + if (r2 < m_params.get_float64("min-r2")) + { + return false; + } + + return true; +} + +} // namespace nvbench::detail diff --git a/nvbench/detail/kernel_launcher_timer_wrapper.cuh b/nvbench/detail/kernel_launcher_timer_wrapper.cuh index 39a999ec..1efdf6e7 100644 --- a/nvbench/detail/kernel_launcher_timer_wrapper.cuh +++ b/nvbench/detail/kernel_launcher_timer_wrapper.cuh @@ -33,7 +33,7 @@ namespace detail template struct kernel_launch_timer_wrapper { - explicit kernel_launch_timer_wrapper(KernelLauncher &launcher) + explicit kernel_launch_timer_wrapper(KernelLauncher &launcher) : m_kernel_launcher{launcher} {} diff --git a/nvbench/detail/l2flush.cuh b/nvbench/detail/l2flush.cuh index 0e33f7e4..f85b3e79 100644 --- a/nvbench/detail/l2flush.cuh +++ b/nvbench/detail/l2flush.cuh @@ -31,13 +31,12 @@ struct l2flush { int dev_id{}; NVBENCH_CUDA_CALL(cudaGetDevice(&dev_id)); - NVBENCH_CUDA_CALL( - cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id)); + NVBENCH_CUDA_CALL(cudaDeviceGetAttribute(&m_l2_size, cudaDevAttrL2CacheSize, dev_id)); if (m_l2_size > 0) { - void* buffer = m_l2_buffer; - NVBENCH_CUDA_CALL(cudaMalloc(&buffer, m_l2_size)); - m_l2_buffer = reinterpret_cast(buffer); + void *buffer = m_l2_buffer; + NVBENCH_CUDA_CALL(cudaMalloc(&buffer, static_cast(m_l2_size))); + m_l2_buffer = reinterpret_cast(buffer); } } @@ -53,7 +52,8 @@ struct l2flush { if (m_l2_size > 0) { - NVBENCH_CUDA_CALL(cudaMemsetAsync(m_l2_buffer, 0, m_l2_size, stream)); + NVBENCH_CUDA_CALL( + cudaMemsetAsync(m_l2_buffer, 0, static_cast(m_l2_size), stream)); } } diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu index 380d2cd1..3b415fb9 100644 --- a/nvbench/detail/measure_cold.cu +++ b/nvbench/detail/measure_cold.cu @@ -16,53 +16,48 @@ * limitations under the License. */ -#include - #include +#include +#include +#include #include #include #include #include -#include -#include - #include -#include -#include -#include -#include - namespace nvbench::detail { measure_cold_base::measure_cold_base(state &exec_state) : m_state{exec_state} , m_launch{m_state.get_cuda_stream()} + , m_criterion_params{exec_state.get_criterion_params()} + , m_stopping_criterion{nvbench::criterion_manager::get().get_criterion(exec_state.get_stopping_criterion())} , m_run_once{exec_state.get_run_once()} , m_no_block{exec_state.get_disable_blocking_kernel()} , m_min_samples{exec_state.get_min_samples()} - , m_max_noise{exec_state.get_max_noise()} - , m_min_time{exec_state.get_min_time()} , m_skip_time{exec_state.get_skip_time()} , m_timeout{exec_state.get_timeout()} -{} +{ + if (m_min_samples > 0) + { + m_cuda_times.reserve(static_cast(m_min_samples)); + m_cpu_times.reserve(static_cast(m_min_samples)); + } +} void measure_cold_base::check() { const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `cold` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `cold` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -72,10 +67,11 @@ void measure_cold_base::initialize() m_total_cpu_time = 0.; m_cpu_noise = 0.; m_total_samples = 0; - m_noise_tracker.clear(); m_cuda_times.clear(); m_cpu_times.clear(); m_max_time_exceeded = false; + + m_stopping_criterion.initialize(m_criterion_params); } void measure_cold_base::run_trials_prologue() { m_walltime_timer.start(); } @@ -91,18 +87,7 @@ void measure_cold_base::record_measurements() m_total_cpu_time += cur_cpu_time; ++m_total_samples; - // Compute convergence statistics using CUDA timings: - const auto mean_cuda_time = m_total_cuda_time / - static_cast(m_total_samples); - const auto cuda_stdev = - nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), - m_cuda_times.cend(), - mean_cuda_time); - auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; - if (std::isfinite(cuda_rel_stdev)) - { - m_noise_tracker.push_back(cuda_rel_stdev); - } + m_stopping_criterion.add_measurement(cur_cuda_time); } bool measure_cold_base::is_finished() @@ -113,39 +98,12 @@ bool measure_cold_base::is_finished() } // Check that we've gathered enough samples: - if (m_total_cuda_time > m_min_time && m_total_samples > m_min_samples) + if (m_total_samples > m_min_samples) { - // Noise has dropped below threshold - if (m_noise_tracker.back() < m_max_noise) + if (m_stopping_criterion.is_finished()) { return true; } - - // Check if the noise (cuda rel stdev) has converged by inspecting a - // trailing window of recorded noise measurements. - // This helps identify benchmarks that are inherently noisy and would - // never converge to the target stdev threshold. This check ensures that the - // benchmark will end if the stdev stabilizes above the target threshold. - // Gather some iterations before checking noise, and limit how often we - // check this. - if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0)) - { - // Use the current noise as the stdev reference. - const auto current_noise = m_noise_tracker.back(); - const auto noise_stdev = nvbench::detail::statistics::standard_deviation( - m_noise_tracker.cbegin(), - m_noise_tracker.cend(), - current_noise); - const auto noise_rel_stdev = noise_stdev / current_noise; - - // If the rel stdev of the last N cuda noise measurements is less than - // 5%, consider the result stable. - const auto noise_threshold = 0.05; - if (noise_rel_stdev < noise_threshold) - { - return true; - } - } } // Check for timeouts: @@ -162,13 +120,11 @@ bool measure_cold_base::is_finished() void measure_cold_base::run_trials_epilogue() { // Only need to compute this at the end, not per iteration. - const auto cpu_mean = m_total_cuda_time / - static_cast(m_total_samples); - const auto cpu_stdev = - nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(), - m_cpu_times.cend(), - cpu_mean); - m_cpu_noise = cpu_stdev / cpu_mean; + const auto cpu_mean = m_total_cuda_time / static_cast(m_total_samples); + const auto cpu_stdev = nvbench::detail::statistics::standard_deviation(m_cpu_times.cbegin(), + m_cpu_times.cend(), + cpu_mean); + m_cpu_noise = cpu_stdev / cpu_mean; m_walltime_timer.stop(); } @@ -199,8 +155,7 @@ void measure_cold_base::generate_summaries() auto &summ = m_state.add_summary("nv/cold/time/cpu/stdev/relative"); summ.set_string("name", "Noise"); summ.set_string("hint", "percentage"); - summ.set_string("description", - "Relative standard deviation of isolated CPU times"); + summ.set_string("description", "Relative standard deviation of isolated CPU times"); summ.set_float64("value", m_cpu_noise); } @@ -215,16 +170,21 @@ void measure_cold_base::generate_summaries() summ.set_float64("value", avg_cuda_time); } + const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); + const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), + m_cuda_times.cend(), + mean_cuda_time); + const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + const auto noise = cuda_rel_stdev; + const auto max_noise = m_criterion_params.get_float64("max-noise"); + const auto min_time = m_criterion_params.get_float64("min-time"); + { auto &summ = m_state.add_summary("nv/cold/time/gpu/stdev/relative"); summ.set_string("name", "Noise"); summ.set_string("hint", "percentage"); - summ.set_string("description", - "Relative standard deviation of isolated GPU times"); - summ.set_float64("value", - m_noise_tracker.empty() - ? std::numeric_limits::infinity() - : m_noise_tracker.back()); + summ.set_string("description", "Relative standard deviation of isolated GPU times"); + summ.set_float64("value", noise); } if (const auto items = m_state.get_element_count(); items != 0) @@ -232,8 +192,7 @@ void measure_cold_base::generate_summaries() auto &summ = m_state.add_summary("nv/cold/bw/item_rate"); summ.set_string("name", "Elem/s"); summ.set_string("hint", "item_rate"); - summ.set_string("description", - "Number of input elements processed per second"); + summ.set_string("description", "Number of input elements processed per second"); summ.set_float64("value", static_cast(items) / avg_cuda_time); } @@ -251,8 +210,8 @@ void measure_cold_base::generate_summaries() } { - const auto peak_gmem_bw = static_cast( - m_state.get_device()->get_global_memory_bus_bandwidth()); + const auto peak_gmem_bw = + static_cast(m_state.get_device()->get_global_memory_bus_bandwidth()); auto &summ = m_state.add_summary("nv/cold/bw/global/utilization"); summ.set_string("name", "BWUtil"); @@ -274,8 +233,7 @@ void measure_cold_base::generate_summaries() } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); @@ -283,15 +241,15 @@ void measure_cold_base::generate_summaries() { const auto timeout = m_walltime_timer.get_duration(); - if (!m_noise_tracker.empty() && m_noise_tracker.back() > m_max_noise) + if (noise > max_noise) { printer.log(nvbench::log_level::warn, fmt::format("Current measurement timed out ({:0.2f}s) " "while over noise threshold ({:0.2f}% > " "{:0.2f}%)", timeout, - m_noise_tracker.back() * 100, - m_max_noise * 100)); + noise * 100, + max_noise * 100)); } if (m_total_samples < m_min_samples) { @@ -302,7 +260,7 @@ void measure_cold_base::generate_summaries() m_total_samples, m_min_samples)); } - if (m_total_cuda_time < m_min_time) + if (m_total_cuda_time < min_time) { printer.log(nvbench::log_level::warn, fmt::format("Current measurement timed out ({:0.2f}s) " @@ -310,7 +268,7 @@ void measure_cold_base::generate_summaries() "{:0.2f}s)", timeout, m_total_cuda_time, - m_min_time)); + min_time)); } } @@ -324,10 +282,7 @@ void measure_cold_base::generate_summaries() m_walltime_timer.get_duration(), m_total_samples)); - printer.process_bulk_data(m_state, - "nv/cold/sample_times", - "sample_times", - m_cuda_times); + printer.process_bulk_data(m_state, "nv/cold/sample_times", "sample_times", m_cuda_times); } } diff --git a/nvbench/detail/measure_cold.cuh b/nvbench/detail/measure_cold.cuh index 0cab36ae..2b0183f5 100644 --- a/nvbench/detail/measure_cold.cuh +++ b/nvbench/detail/measure_cold.cuh @@ -25,15 +25,14 @@ #include #include #include +#include #include #include -#include #include #include -#include #include #include @@ -49,10 +48,10 @@ namespace detail struct measure_cold_base { explicit measure_cold_base(nvbench::state &exec_state); - measure_cold_base(const measure_cold_base &) = delete; - measure_cold_base(measure_cold_base &&) = delete; + measure_cold_base(const measure_cold_base &) = delete; + measure_cold_base(measure_cold_base &&) = delete; measure_cold_base &operator=(const measure_cold_base &) = delete; - measure_cold_base &operator=(measure_cold_base &&) = delete; + measure_cold_base &operator=(measure_cold_base &&) = delete; protected: template @@ -68,10 +67,7 @@ protected: void check_skip_time(nvbench::float64_t warmup_time); - __forceinline__ void flush_device_l2() - { - m_l2flush.flush(m_launch.get_stream()); - } + __forceinline__ void flush_device_l2() { m_l2flush.flush(m_launch.get_stream()); } __forceinline__ void sync_stream() const { @@ -90,12 +86,13 @@ protected: nvbench::detail::l2flush m_l2flush; nvbench::blocking_kernel m_blocker; + nvbench::criterion_params m_criterion_params; + nvbench::stopping_criterion_base& m_stopping_criterion; + bool m_run_once{false}; bool m_no_block{false}; nvbench::int64_t m_min_samples{}; - nvbench::float64_t m_max_noise{}; // rel stdev - nvbench::float64_t m_min_time{}; nvbench::float64_t m_skip_time{}; nvbench::float64_t m_timeout{}; @@ -105,9 +102,6 @@ protected: nvbench::float64_t m_total_cpu_time{}; nvbench::float64_t m_cpu_noise{}; // rel stdev - // Trailing history of noise measurements for convergence tests - nvbench::detail::ring_buffer m_noise_tracker{512}; - std::vector m_cuda_times; std::vector m_cpu_times; diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index 9e8de6c3..e583cd54 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -50,8 +50,7 @@ struct metric_traits; template <> struct metric_traits { - static constexpr const char *metric_name = - "dram__throughput.avg.pct_of_peak_sustained_elapsed"; + static constexpr const char *metric_name = "dram__throughput.avg.pct_of_peak_sustained_elapsed"; static constexpr const char *name = "HBWPeak"; static constexpr const char *hint = "percentage"; @@ -119,10 +118,7 @@ struct metric_traits static constexpr const char *description = "Hit rate at L1 cache."; static constexpr double divider = 100.0; - static bool is_collected(nvbench::state &m_state) - { - return m_state.is_l1_hit_rate_collected(); - }; + static bool is_collected(nvbench::state &m_state) { return m_state.is_l1_hit_rate_collected(); }; }; template <> @@ -134,10 +130,7 @@ struct metric_traits static constexpr const char *description = "Hit rate at L2 cache."; static constexpr double divider = 100.0; - static bool is_collected(nvbench::state &m_state) - { - return m_state.is_l2_hit_rate_collected(); - }; + static bool is_collected(nvbench::state &m_state) { return m_state.is_l2_hit_rate_collected(); }; }; template @@ -153,8 +146,7 @@ void add_metrics_impl(nvbench::state &state, std::vector &metrics) } template <> -void add_metrics_impl(nvbench::state &, - std::vector &) +void add_metrics_impl(nvbench::state &, std::vector &) {} std::vector add_metrics(nvbench::state &state) @@ -179,13 +171,11 @@ try // clang-format on catch (const std::exception &ex) { - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::warn, - fmt::format("CUPTI failed to construct profiler: {}", - ex.what())); + fmt::format("CUPTI failed to construct profiler: {}", ex.what())); } } @@ -194,15 +184,11 @@ void measure_cupti_base::check() const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `cupti` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `cupti` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -210,16 +196,13 @@ namespace { template -void gen_summary(std::size_t result_id, - nvbench::state &m_state, - const std::vector &result) +void gen_summary(std::size_t result_id, nvbench::state &m_state, const std::vector &result) { using metric = metric_traits; if (metric::is_collected(m_state)) { - auto &summ = - m_state.add_summary(fmt::format("nv/cupti/{}", metric::metric_name)); + auto &summ = m_state.add_summary(fmt::format("nv/cupti/{}", metric::metric_name)); summ.set_string("name", metric::name); summ.set_string("hint", metric::hint); summ.set_string("description", metric::description); @@ -231,9 +214,7 @@ void gen_summary(std::size_t result_id, } template <> -void gen_summary(std::size_t, - nvbench::state &, - const std::vector &) +void gen_summary(std::size_t, nvbench::state &, const std::vector &) {} void gen_summaries(nvbench::state &state, const std::vector &result) @@ -266,8 +247,7 @@ try } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::pass, @@ -278,13 +258,11 @@ try } catch (const std::exception &ex) { - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::warn, - fmt::format("CUPTI failed to generate the summary: {}", - ex.what())); + fmt::format("CUPTI failed to generate the summary: {}", ex.what())); } } diff --git a/nvbench/detail/measure_cupti.cuh b/nvbench/detail/measure_cupti.cuh index 736c3754..ec7b2120 100644 --- a/nvbench/detail/measure_cupti.cuh +++ b/nvbench/detail/measure_cupti.cuh @@ -50,10 +50,10 @@ namespace detail struct measure_cupti_base { explicit measure_cupti_base(nvbench::state &exec_state); - measure_cupti_base(const measure_cupti_base &) = delete; - measure_cupti_base(measure_cupti_base &&) = delete; + measure_cupti_base(const measure_cupti_base &) = delete; + measure_cupti_base(measure_cupti_base &&) = delete; measure_cupti_base &operator=(const measure_cupti_base &) = delete; - measure_cupti_base &operator=(measure_cupti_base &&) = delete; + measure_cupti_base &operator=(measure_cupti_base &&) = delete; protected: struct kernel_launch_timer; @@ -61,10 +61,7 @@ protected: void check(); void generate_summaries(); - __forceinline__ void flush_device_l2() - { - m_l2flush.flush(m_launch.get_stream()); - } + __forceinline__ void flush_device_l2() { m_l2flush.flush(m_launch.get_stream()); } __forceinline__ void sync_stream() const { diff --git a/nvbench/detail/measure_hot.cu b/nvbench/detail/measure_hot.cu index 25e2119f..94971229 100644 --- a/nvbench/detail/measure_hot.cu +++ b/nvbench/detail/measure_hot.cu @@ -47,9 +47,8 @@ measure_hot_base::measure_hot_base(state &exec_state) // to match the cold result if available. try { - nvbench::int64_t cold_samples = - m_state.get_summary("nv/cold/sample_size").get_int64("value"); - m_min_samples = std::max(m_min_samples, cold_samples); + nvbench::int64_t cold_samples = m_state.get_summary("nv/cold/sample_size").get_int64("value"); + m_min_samples = std::max(m_min_samples, cold_samples); // If the cold measurement ran successfully, disable skip_time. It'd just // be annoying to skip now. @@ -72,15 +71,11 @@ void measure_hot_base::check() const auto device = m_state.get_device(); if (!device) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Device required for `hot` measurement."); + NVBENCH_THROW(std::runtime_error, "{}", "Device required for `hot` measurement."); } if (!device->is_active()) { // This means something went wrong higher up. Throw an error. - NVBENCH_THROW(std::runtime_error, - "{}", - "Internal error: Current device is not active."); + NVBENCH_THROW(std::runtime_error, "{}", "Internal error: Current device is not active."); } } @@ -116,8 +111,7 @@ void measure_hot_base::generate_summaries() } // Log if a printer exists: - if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = m_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); diff --git a/nvbench/detail/measure_hot.cuh b/nvbench/detail/measure_hot.cuh index 9c4c2ecf..c9175830 100644 --- a/nvbench/detail/measure_hot.cuh +++ b/nvbench/detail/measure_hot.cuh @@ -27,7 +27,7 @@ #include -#include +#include namespace nvbench { @@ -41,10 +41,10 @@ namespace detail struct measure_hot_base { explicit measure_hot_base(nvbench::state &exec_state); - measure_hot_base(const measure_hot_base &) = delete; - measure_hot_base(measure_hot_base &&) = delete; + measure_hot_base(const measure_hot_base &) = delete; + measure_hot_base(measure_hot_base &&) = delete; measure_hot_base &operator=(const measure_hot_base &) = delete; - measure_hot_base &operator=(measure_hot_base &&) = delete; + measure_hot_base &operator=(measure_hot_base &&) = delete; protected: void check(); @@ -131,7 +131,7 @@ private: // The .95 factor here pads the batch_size a bit to avoid needing a second // batch due to noise. const auto time_estimate = m_cuda_timer.get_duration() * 0.95; - auto batch_size = static_cast(m_min_time / time_estimate); + auto batch_size = static_cast(m_min_time / time_estimate); do { @@ -142,7 +142,7 @@ private: // Block stream until some work is queued. // Limit the number of kernel executions while blocked to prevent // deadlocks. See warnings on blocking_kernel. - const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2}); + const auto blocked_launches = std::min(batch_size, nvbench::int64_t{2}); const auto unblocked_launches = batch_size - blocked_launches; this->block_stream(); @@ -189,7 +189,6 @@ private: break; // Stop iterating } - m_walltime_timer.stop(); if (m_walltime_timer.get_duration() > m_timeout) { diff --git a/nvbench/detail/ring_buffer.cuh b/nvbench/detail/ring_buffer.cuh index fa862004..5c00b24a 100644 --- a/nvbench/detail/ring_buffer.cuh +++ b/nvbench/detail/ring_buffer.cuh @@ -22,18 +22,112 @@ #include +#include +#include #include #include namespace nvbench::detail { +template +class ring_buffer_iterator +{ + std::ptrdiff_t m_index; + std::ptrdiff_t m_capacity; + T *m_ptr; + +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + + ring_buffer_iterator(std::ptrdiff_t index, std::ptrdiff_t capacity, pointer ptr) + : m_index{index} + , m_capacity{capacity} + , m_ptr{ptr} + {} + + ring_buffer_iterator operator++() + { + ++m_index; + return *this; + } + + ring_buffer_iterator operator++(int) + { + ring_buffer_iterator temp = *this; + ++(*this); + return temp; + } + + ring_buffer_iterator &operator--() + { + --m_index; + return *this; + } + + ring_buffer_iterator operator--(int) + { + ring_buffer_iterator temp = *this; + --(*this); + return temp; + } + + ring_buffer_iterator operator+(difference_type n) const + { + return ring_buffer_iterator(m_index + n, m_capacity, m_ptr); + } + + ring_buffer_iterator operator-(difference_type n) const + { + return ring_buffer_iterator(m_index - n, m_capacity, m_ptr); + } + + difference_type operator-(const ring_buffer_iterator &other) const + { + return m_index - other.m_index; + } + + reference operator*() const { return m_ptr[m_index % m_capacity]; } + pointer operator->() const { return &(operator*()); } + + reference operator[](difference_type n) const { return *(*this + n); } + + bool operator==(const ring_buffer_iterator &other) const + { + return m_ptr == other.m_ptr && m_index == other.m_index; + } + bool operator!=(const ring_buffer_iterator &other) const { return !(*this == other); } + bool operator<(const ring_buffer_iterator &other) const { return m_index < other.m_index; } + bool operator>(const ring_buffer_iterator &other) const { return m_index > other.m_index; } + bool operator<=(const ring_buffer_iterator &other) const { return !(*this > other); } + bool operator>=(const ring_buffer_iterator &other) const { return !(*this < other); } +}; + /** * @brief A simple, dynamically sized ring buffer. */ template struct ring_buffer { +private: + using buffer_t = typename std::vector; + using diff_t = typename buffer_t::difference_type; + + buffer_t m_buffer; + std::size_t m_index{0}; + bool m_full{false}; + + std::size_t get_front_index() const + { + return m_full ? m_index : 0; + } + +public: + /** * Create a new ring buffer with the requested capacity. */ @@ -42,34 +136,59 @@ struct ring_buffer {} /** - * Iterators provide all values in the ring buffer in unspecified order. + * Iterators provide all values in the ring buffer in FIFO order. * @{ */ - // clang-format off - [[nodiscard]] auto begin() { return m_buffer.begin(); } - [[nodiscard]] auto begin() const { return m_buffer.begin(); } - [[nodiscard]] auto cbegin() const { return m_buffer.cbegin(); } - [[nodiscard]] auto end() { return m_buffer.begin() + this->size(); } - [[nodiscard]] auto end() const { return m_buffer.begin() + this->size(); } - [[nodiscard]] auto cend() const { return m_buffer.cbegin() + this->size(); } - // clang-format on + [[nodiscard]] ring_buffer_iterator begin() + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator end() + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } + [[nodiscard]] ring_buffer_iterator begin() const + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator end() const + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } + [[nodiscard]] ring_buffer_iterator cbegin() const + { + return {static_cast(get_front_index()), + static_cast(capacity()), + m_buffer.data()}; + } + + [[nodiscard]] ring_buffer_iterator cend() const + { + return {static_cast(get_front_index() + size()), + static_cast(capacity()), + m_buffer.data()}; + } /** @} */ /** * The number of valid values in the ring buffer. Always <= capacity(). */ - [[nodiscard]] std::size_t size() const - { - return m_full ? m_buffer.size() : m_index; - } + [[nodiscard]] std::size_t size() const { return m_full ? m_buffer.size() : m_index; } /** * The maximum size of the ring buffer. */ - [[nodiscard]] std::size_t capacity() const - { - return m_buffer.size(); - } + [[nodiscard]] std::size_t capacity() const { return m_buffer.size(); } /** * @return True if the ring buffer is empty. @@ -119,11 +238,6 @@ struct ring_buffer return m_buffer[back_index]; } /**@}*/ - -private: - std::vector m_buffer; - std::size_t m_index{0}; - bool m_full{false}; }; } // namespace nvbench::detail diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh index 9352a5ff..bab2daf1 100644 --- a/nvbench/detail/state_exec.cuh +++ b/nvbench/detail/state_exec.cuh @@ -53,7 +53,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) "`ExecTags` argument must be a member (or combination of " "members) from nvbench::exec_tag."); - constexpr auto measure_tags = tags & measure_mask; + constexpr auto measure_tags = tags & measure_mask; constexpr auto modifier_tags = tags & modifier_mask; // "run once" is handled by the cold measurement: @@ -81,8 +81,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) } else { - this->exec(cold | hot | tags, - std::forward(kernel_launcher)); + this->exec(cold | hot | tags, std::forward(kernel_launcher)); } return; } @@ -99,8 +98,8 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) constexpr bool use_blocking_kernel = !(tags & no_block); if constexpr (tags & timer) { - // Estimate bandwidth here - #ifdef NVBENCH_HAS_CUPTI +// Estimate bandwidth here +#ifdef NVBENCH_HAS_CUPTI if constexpr (!(modifier_tags & run_once)) { if (this->is_cupti_required()) @@ -110,7 +109,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) measure(); } } - #endif +#endif using measure_t = nvbench::detail::measure_cold; measure_t measure{*this, kernel_launcher}; @@ -121,8 +120,8 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) using wrapper_t = nvbench::detail::kernel_launch_timer_wrapper; wrapper_t wrapper{kernel_launcher}; - // Estimate bandwidth here - #ifdef NVBENCH_HAS_CUPTI +// Estimate bandwidth here +#ifdef NVBENCH_HAS_CUPTI if constexpr (!(modifier_tags & run_once)) { if (this->is_cupti_required()) @@ -132,10 +131,9 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) measure(); } } - #endif +#endif - using measure_t = - nvbench::detail::measure_cold; + using measure_t = nvbench::detail::measure_cold; measure_t measure(*this, wrapper); measure(); } @@ -143,12 +141,10 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher) if constexpr (tags & hot) { - static_assert(!(tags & sync), - "Hot measurement doesn't support the `sync` exec_tag."); - static_assert(!(tags & timer), - "Hot measurement doesn't support the `timer` exec_tag."); + static_assert(!(tags & sync), "Hot measurement doesn't support the `sync` exec_tag."); + static_assert(!(tags & timer), "Hot measurement doesn't support the `timer` exec_tag."); constexpr bool use_blocking_kernel = !(tags & no_block); - using measure_t = nvbench::detail::measure_hot; + using measure_t = nvbench::detail::measure_hot; measure_t measure{*this, kernel_launcher}; measure(); } diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx index 8c153bff..26a897a4 100644 --- a/nvbench/detail/state_generator.cxx +++ b/nvbench/detail/state_generator.cxx @@ -40,9 +40,7 @@ void state_iterator::add_axis(const nvbench::axis_base &axis) this->add_axis(axis.get_name(), axis.get_type(), axis.get_size()); } -void state_iterator::add_axis(std::string axis, - nvbench::axis_type type, - std::size_t size) +void state_iterator::add_axis(std::string axis, nvbench::axis_type type, std::size_t size) { m_indices.push_back({std::move(axis), type, std::size_t{0}, size}); } @@ -74,10 +72,7 @@ state_iterator::get_current_indices() const return m_indices; } -[[nodiscard]] bool state_iterator::iter_valid() const -{ - return m_current < m_total; -} +[[nodiscard]] bool state_iterator::iter_valid() const { return m_current < m_total; } void state_iterator::next() { @@ -102,7 +97,7 @@ state_generator::state_generator(const benchmark_base &bench) void state_generator::build_axis_configs() { - const axes_metadata &axes = m_benchmark.get_axes(); + const axes_metadata &axes = m_benchmark.get_axes(); const std::vector> &axes_vec = axes.get_axes(); // Construct two state_generators: @@ -118,35 +113,29 @@ void state_generator::build_axis_configs() type_axes.reserve(axes_vec.size()); // Filter all axes by into type and non-type: - std::for_each(axes_vec.cbegin(), - axes_vec.cend(), - [&non_type_si, &type_axes](const auto &axis) { - if (axis->get_type() == nvbench::axis_type::type) - { - type_axes.push_back( - std::cref(static_cast(*axis))); - } - else - { - non_type_si.add_axis(*axis); - } - }); + std::for_each(axes_vec.cbegin(), axes_vec.cend(), [&non_type_si, &type_axes](const auto &axis) { + if (axis->get_type() == nvbench::axis_type::type) + { + type_axes.push_back(std::cref(static_cast(*axis))); + } + else + { + non_type_si.add_axis(*axis); + } + }); // Reverse sort type axes by index. This way the state_generator's cartesian // product of the type axes values will be enumerated in the same order as // nvbench::tl::cartesian_product. This is necessary to ensure // that the correct states are passed to the corresponding benchmark // instantiations. - std::sort(type_axes.begin(), - type_axes.end(), - [](const auto &axis_1, const auto &axis_2) { - return axis_1.get().get_axis_index() > - axis_2.get().get_axis_index(); - }); - - std::for_each(type_axes.cbegin(), - type_axes.cend(), - [&type_si](const auto &axis) { type_si.add_axis(axis); }); + std::sort(type_axes.begin(), type_axes.end(), [](const auto &axis_1, const auto &axis_2) { + return axis_1.get().get_axis_index() > axis_2.get().get_axis_index(); + }); + + std::for_each(type_axes.cbegin(), type_axes.cend(), [&type_si](const auto &axis) { + type_si.add_axis(axis); + }); } // type_axis_configs generation: @@ -157,8 +146,8 @@ void state_generator::build_axis_configs() // Build type_axis_configs for (type_si.init(); type_si.iter_valid(); type_si.next()) { - auto &[config, active_mask] = m_type_axis_configs.emplace_back( - std::make_pair(nvbench::named_values{}, true)); + auto &[config, active_mask] = + m_type_axis_configs.emplace_back(std::make_pair(nvbench::named_values{}, true)); // Reverse the indices so they're once again in the same order as // specified: @@ -173,8 +162,7 @@ void state_generator::build_axis_configs() active_mask = false; } - config.set_string(axis_info.axis, - axis.get_input_string(axis_info.index)); + config.set_string(axis_info.axis, axis.get_input_string(axis_info.index)); } } // type_si } // type_axis_config generation @@ -199,21 +187,18 @@ void state_generator::build_axis_configs() break; case axis_type::int64: - config.set_int64( - axis_info.axis, - axes.get_int64_axis(axis_info.axis).get_value(axis_info.index)); + config.set_int64(axis_info.axis, + axes.get_int64_axis(axis_info.axis).get_value(axis_info.index)); break; case axis_type::float64: - config.set_float64( - axis_info.axis, - axes.get_float64_axis(axis_info.axis).get_value(axis_info.index)); + config.set_float64(axis_info.axis, + axes.get_float64_axis(axis_info.axis).get_value(axis_info.index)); break; case axis_type::string: - config.set_string( - axis_info.axis, - axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); + config.set_string(axis_info.axis, + axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); break; } // switch (type) } // for (axis_info : current_indices) @@ -239,15 +224,12 @@ void state_generator::build_states() } } -void state_generator::add_states_for_device( - const std::optional &device) +void state_generator::add_states_for_device(const std::optional &device) { const auto num_type_configs = m_type_axis_configs.size(); - for (std::size_t type_config_index = 0; type_config_index < num_type_configs; - ++type_config_index) + for (std::size_t type_config_index = 0; type_config_index < num_type_configs; ++type_config_index) { - const auto &[type_config, - axis_mask] = m_type_axis_configs[type_config_index]; + const auto &[type_config, axis_mask] = m_type_axis_configs[type_config_index]; if (!axis_mask) { // Don't generate inner vector if the type config is masked out. @@ -261,10 +243,7 @@ void state_generator::add_states_for_device( config.append(non_type_config); // Create benchmark: - m_states.push_back(nvbench::state{m_benchmark, - std::move(config), - device, - type_config_index}); + m_states.push_back(nvbench::state{m_benchmark, std::move(config), device, type_config_index}); } } } diff --git a/nvbench/detail/statistics.cuh b/nvbench/detail/statistics.cuh index 957bca4c..522b4f21 100644 --- a/nvbench/detail/statistics.cuh +++ b/nvbench/detail/statistics.cuh @@ -18,16 +18,22 @@ #pragma once -#include - #include +#include #include #include #include #include +#include +#include + #include +#ifndef M_PI + #define M_PI 3.14159265358979323846 +#endif + namespace nvbench::detail::statistics { @@ -36,13 +42,13 @@ namespace nvbench::detail::statistics * * If the input has fewer than 5 sample, infinity is returned. */ -template ::value_type> +template ::value_type> ValueType standard_deviation(Iter first, Iter last, ValueType mean) { static_assert(std::is_floating_point_v); - const auto num = last - first; + const auto num = std::distance(first, last); + if (num < 5) // don't bother with low sample sizes. { return std::numeric_limits::infinity(); @@ -57,8 +63,135 @@ ValueType standard_deviation(Iter first, Iter last, ValueType mean) val *= val; return val; }) / - static_cast((num - 1)); + static_cast((num - 1)); // Bessel’s correction return std::sqrt(variance); } +/** + * Computes and returns the mean. + * + * If the input has fewer than 1 sample, infinity is returned. + */ +template +nvbench::float64_t compute_mean(It first, It last) +{ + const auto num = std::distance(first, last); + + if (num < 1) + { + return std::numeric_limits::infinity(); + } + + return std::accumulate(first, last, 0.0) / static_cast(num); +} + +/** + * Computes linear regression and returns the slope and intercept + * + * This version takes precomputed mean of [first, last). + * If the input has fewer than 2 samples, infinity is returned for both slope and intercept. + */ +template +std::pair +compute_linear_regression(It first, It last, nvbench::float64_t mean_y) +{ + const std::size_t n = static_cast(std::distance(first, last)); + + if (n < 2) + { + return std::make_pair(std::numeric_limits::infinity(), + std::numeric_limits::infinity()); + } + + // Assuming x starts from 0 + const nvbench::float64_t mean_x = (static_cast(n) - 1.0) / 2.0; + + // Calculate the numerator and denominator for the slope + nvbench::float64_t numerator = 0.0; + nvbench::float64_t denominator = 0.0; + + for (std::size_t i = 0; i < n; ++i, ++first) + { + const nvbench::float64_t x_diff = static_cast(i) - mean_x; + numerator += x_diff * (*first - mean_y); + denominator += x_diff * x_diff; + } + + // Calculate the slope and intercept + const nvbench::float64_t slope = numerator / denominator; + const nvbench::float64_t intercept = mean_y - slope * mean_x; + + return std::make_pair(slope, intercept); +} + +/** + * Computes linear regression and returns the slope and intercept + * + * If the input has fewer than 2 samples, infinity is returned for both slope and intercept. + */ +template +std::pair compute_linear_regression(It first, It last) +{ + return compute_linear_regression(first, last, compute_mean(first, last)); +} + +/** + * Computes and returns the R^2 (coefficient of determination) + * + * This version takes precomputed mean of [first, last). + */ +template +nvbench::float64_t compute_r2(It first, + It last, + nvbench::float64_t mean_y, + nvbench::float64_t slope, + nvbench::float64_t intercept) +{ + const std::size_t n = static_cast(std::distance(first, last)); + + nvbench::float64_t ss_tot = 0.0; + nvbench::float64_t ss_res = 0.0; + + for (std::size_t i = 0; i < n; ++i, ++first) + { + const nvbench::float64_t y = *first; + const nvbench::float64_t y_pred = slope * static_cast(i) + intercept; + + ss_tot += (y - mean_y) * (y - mean_y); + ss_res += (y - y_pred) * (y - y_pred); + } + + if (ss_tot == 0.0) + { + return 1.0; + } + + return 1.0 - ss_res / ss_tot; +} + +/** + * Computes and returns the R^2 (coefficient of determination) + */ +template +nvbench::float64_t +compute_r2(It first, It last, nvbench::float64_t slope, nvbench::float64_t intercept) +{ + return compute_r2(first, last, compute_mean(first, last), slope, intercept); +} + +inline nvbench::float64_t rad2deg(nvbench::float64_t rad) +{ + return rad * 180.0 / M_PI; +} + +inline nvbench::float64_t slope2rad(nvbench::float64_t slope) +{ + return std::atan2(slope, 1.0); +} + +inline nvbench::float64_t slope2deg(nvbench::float64_t slope) +{ + return rad2deg(slope2rad(slope)); +} + } // namespace nvbench::detail::statistics diff --git a/nvbench/detail/stdrel_criterion.cuh b/nvbench/detail/stdrel_criterion.cuh new file mode 100644 index 00000000..5f87e842 --- /dev/null +++ b/nvbench/detail/stdrel_criterion.cuh @@ -0,0 +1,47 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace nvbench::detail +{ + +class stdrel_criterion final : public stopping_criterion_base +{ + // state + nvbench::int64_t m_total_samples{}; + nvbench::float64_t m_total_cuda_time{}; + std::vector m_cuda_times{}; + nvbench::detail::ring_buffer m_noise_tracker{512}; + +public: + stdrel_criterion(); + +protected: + virtual void do_initialize() override; + virtual void do_add_measurement(nvbench::float64_t measurement) override; + virtual bool do_is_finished() override; +}; + +} // namespace nvbench::detail diff --git a/nvbench/detail/stdrel_criterion.cxx b/nvbench/detail/stdrel_criterion.cxx new file mode 100644 index 00000000..a6c5ea8a --- /dev/null +++ b/nvbench/detail/stdrel_criterion.cxx @@ -0,0 +1,98 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace nvbench::detail +{ + +stdrel_criterion::stdrel_criterion() + : stopping_criterion_base{"stdrel", + {{"max-noise", nvbench::detail::compat_max_noise()}, + {"min-time", nvbench::detail::compat_min_time()}}} +{} + +void stdrel_criterion::do_initialize() +{ + m_total_samples = 0; + m_total_cuda_time = 0.0; + m_cuda_times.clear(); + m_noise_tracker.clear(); +} + +void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement) +{ + m_total_samples++; + m_total_cuda_time += measurement; + m_cuda_times.push_back(measurement); + + // Compute convergence statistics using CUDA timings: + const auto mean_cuda_time = m_total_cuda_time / static_cast(m_total_samples); + const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), + m_cuda_times.cend(), + mean_cuda_time); + const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; + if (std::isfinite(cuda_rel_stdev)) + { + m_noise_tracker.push_back(cuda_rel_stdev); + } +} + +bool stdrel_criterion::do_is_finished() +{ + if (m_total_cuda_time <= m_params.get_float64("min-time")) + { + return false; + } + + // Noise has dropped below threshold + if (m_noise_tracker.back() < m_params.get_float64("max-noise")) + { + return true; + } + + // Check if the noise (cuda rel stdev) has converged by inspecting a + // trailing window of recorded noise measurements. + // This helps identify benchmarks that are inherently noisy and would + // never converge to the target stdev threshold. This check ensures that the + // benchmark will end if the stdev stabilizes above the target threshold. + // Gather some iterations before checking noise, and limit how often we + // check this. + if (m_noise_tracker.size() > 64 && (m_total_samples % 16 == 0)) + { + // Use the current noise as the stdev reference. + const auto current_noise = m_noise_tracker.back(); + const auto noise_stdev = + nvbench::detail::statistics::standard_deviation(m_noise_tracker.cbegin(), + m_noise_tracker.cend(), + current_noise); + const auto noise_rel_stdev = noise_stdev / current_noise; + + // If the rel stdev of the last N cuda noise measurements is less than + // 5%, consider the result stable. + const auto noise_threshold = 0.05; + if (noise_rel_stdev < noise_threshold) + { + return true; + } + } + + return false; +} + +} // namespace nvbench::detail diff --git a/nvbench/detail/throw.cuh b/nvbench/detail/throw.cuh index ffbe5bb9..e3bb9fd5 100644 --- a/nvbench/detail/throw.cuh +++ b/nvbench/detail/throw.cuh @@ -21,17 +21,15 @@ #include #include -#define NVBENCH_THROW(exception_type, format_str, ...) \ - throw exception_type(fmt::format("{}:{}: {}", \ - __FILE__, \ - __LINE__, \ - fmt::format(format_str, __VA_ARGS__))) +#define NVBENCH_THROW(exception_type, format_str, ...) \ + throw exception_type( \ + fmt::format("{}:{}: {}", __FILE__, __LINE__, fmt::format(format_str, __VA_ARGS__))) -#define NVBENCH_THROW_IF(condition, exception_type, format_str, ...) \ - do \ - { \ - if (condition) \ - { \ - NVBENCH_THROW(exception_type, format_str, __VA_ARGS__); \ - } \ +#define NVBENCH_THROW_IF(condition, exception_type, format_str, ...) \ + do \ + { \ + if (condition) \ + { \ + NVBENCH_THROW(exception_type, format_str, __VA_ARGS__); \ + } \ } while (false) diff --git a/nvbench/detail/transform_reduce.cuh b/nvbench/detail/transform_reduce.cuh index 8bc5db68..56253587 100644 --- a/nvbench/detail/transform_reduce.cuh +++ b/nvbench/detail/transform_reduce.cuh @@ -27,10 +27,7 @@ namespace nvbench::detail { -template +template InitValueT transform_reduce(InIterT first, InIterT last, InitValueT init, diff --git a/nvbench/detail/type_list_impl.cuh b/nvbench/detail/type_list_impl.cuh index d2e498cd..e97aaaa1 100644 --- a/nvbench/detail/type_list_impl.cuh +++ b/nvbench/detail/type_list_impl.cuh @@ -20,12 +20,10 @@ namespace tl::detail { template -auto size(nvbench::type_list) - -> std::integral_constant; +auto size(nvbench::type_list) -> std::integral_constant; -template -auto get(nvbench::type_list) - -> std::tuple_element_t>; +template +auto get(nvbench::type_list) -> std::tuple_element_t>; template auto concat(nvbench::type_list, nvbench::type_list) @@ -44,9 +42,8 @@ struct prepend_each> template struct prepend_each> { - using cur = decltype(detail::concat(nvbench::type_list{}, TL{})); - using next = - typename detail::prepend_each>::type; + using cur = decltype(detail::concat(nvbench::type_list{}, TL{})); + using next = typename detail::prepend_each>::type; using type = decltype(detail::concat(nvbench::type_list{}, next{})); }; @@ -71,23 +68,20 @@ struct cartesian_product, TLTail...>> template struct cartesian_product>> { - using cur = nvbench::type_list>; - using next = - std::conditional_t>>::type, - nvbench::type_list<>>; + using cur = nvbench::type_list>; + using next = std::conditional_t< + sizeof...(Ts) != 0, + typename detail::cartesian_product>>::type, + nvbench::type_list<>>; using type = decltype(detail::concat(cur{}, next{})); }; template -struct cartesian_product< - nvbench::type_list, TL, TLTail...>> +struct cartesian_product, TL, TLTail...>> { - using tail_prod = - typename detail::cartesian_product>::type; - using cur = typename detail::prepend_each::type; - using next = typename detail::cartesian_product< + using tail_prod = typename detail::cartesian_product>::type; + using cur = typename detail::prepend_each::type; + using next = typename detail::cartesian_product< nvbench::type_list, TL, TLTail...>>::type; using type = decltype(detail::concat(cur{}, next{})); }; diff --git a/nvbench/device_info.cu b/nvbench/device_info.cu index 02c6b973..3b26cdbc 100644 --- a/nvbench/device_info.cu +++ b/nvbench/device_info.cu @@ -45,6 +45,9 @@ device_info::device_info(int id) , m_nvml_device(nullptr) { NVBENCH_CUDA_CALL(cudaGetDeviceProperties(&m_prop, m_id)); + // NVML's lifetime should extend for the entirety of the process, so store in a + // global. + [[maybe_unused]] static auto nvml_lifetime = nvbench::nvml::NVMLLifetimeManager(); #ifdef NVBENCH_HAS_NVML // Retrieve the current device's pci_id as a null-terminated string. @@ -65,17 +68,15 @@ void device_info::set_persistence_mode(bool state) #else // NVBENCH_HAS_NVML try { - NVBENCH_NVML_CALL(nvmlDeviceSetPersistenceMode( - m_nvml_device, - state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED)); + NVBENCH_NVML_CALL( + nvmlDeviceSetPersistenceMode(m_nvml_device, + state ? NVML_FEATURE_ENABLED : NVML_FEATURE_DISABLED)); } catch (nvml::call_failed &e) { if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Persistence mode is only supported on Linux."); + NVBENCH_THROW(std::runtime_error, "{}", "Persistence mode is only supported on Linux."); } else if (e.get_error_code() == NVML_ERROR_NO_PERMISSION) { @@ -104,30 +105,26 @@ try break; case clock_rate::base: - NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks( - m_nvml_device, - static_cast(NVML_CLOCK_LIMIT_ID_TDP), - static_cast(NVML_CLOCK_LIMIT_ID_TDP))); + NVBENCH_NVML_CALL( + nvmlDeviceSetGpuLockedClocks(m_nvml_device, + static_cast(NVML_CLOCK_LIMIT_ID_TDP), + static_cast(NVML_CLOCK_LIMIT_ID_TDP))); break; case clock_rate::maximum: { - const auto max_mhz = static_cast( - this->get_sm_default_clock_rate() / (1000 * 1000)); - NVBENCH_NVML_CALL( - nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz)); + const auto max_mhz = + static_cast(this->get_sm_default_clock_rate() / (1000 * 1000)); + NVBENCH_NVML_CALL(nvmlDeviceSetGpuLockedClocks(m_nvml_device, max_mhz, max_mhz)); break; } default: - NVBENCH_THROW(std::runtime_error, - "Unrecognized clock rate: {}", - static_cast(rate)); + NVBENCH_THROW(std::runtime_error, "Unrecognized clock rate: {}", static_cast(rate)); } } catch (nvml::call_failed &e) { - if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED && - this->get_sm_version() < 700) + if (e.get_error_code() == NVML_ERROR_NOT_SUPPORTED && this->get_sm_version() < 700) { NVBENCH_THROW(std::runtime_error, "GPU clock rates can only be modified for Volta and later. " @@ -156,9 +153,7 @@ catch (nvml::call_failed &e) { if (!is_active()) { - NVBENCH_THROW(std::runtime_error, - "{}", - "get_context is called for inactive device"); + NVBENCH_THROW(std::runtime_error, "{}", "get_context is called for inactive device"); } CUcontext cu_context; diff --git a/nvbench/device_info.cuh b/nvbench/device_info.cuh index 296a2c2b..98184cf9 100644 --- a/nvbench/device_info.cuh +++ b/nvbench/device_info.cuh @@ -54,10 +54,7 @@ struct device_info [[nodiscard]] int get_id() const { return m_id; } /// @return The name of the device. - [[nodiscard]] std::string_view get_name() const - { - return std::string_view(m_prop.name); - } + [[nodiscard]] std::string_view get_name() const { return std::string_view(m_prop.name); } [[nodiscard]] bool is_active() const { @@ -83,7 +80,6 @@ struct device_info /// @note Requires root / admin privileges. void set_persistence_mode(bool state); - /// Symbolic values for special clock rates enum class clock_rate { @@ -101,10 +97,7 @@ struct device_info void lock_gpu_clocks(clock_rate rate); /// @return The SM version of the current device as (major*100) + (minor*10). - [[nodiscard]] int get_sm_version() const - { - return m_prop.major * 100 + m_prop.minor * 10; - } + [[nodiscard]] int get_sm_version() const { return m_prop.major * 100 + m_prop.minor * 10; } /// @return The PTX version of the current device, e.g. sm_80 returns 800. [[nodiscard]] __forceinline__ int get_ptx_version() const @@ -119,46 +112,25 @@ struct device_info } /// @return The number of physical streaming multiprocessors on this device. - [[nodiscard]] int get_number_of_sms() const - { - return m_prop.multiProcessorCount; - } + [[nodiscard]] int get_number_of_sms() const { return m_prop.multiProcessorCount; } /// @return The maximum number of resident blocks per SM. - [[nodiscard]] int get_max_blocks_per_sm() const - { - return m_prop.maxBlocksPerMultiProcessor; - } + [[nodiscard]] int get_max_blocks_per_sm() const { return m_prop.maxBlocksPerMultiProcessor; } /// @return The maximum number of resident threads per SM. - [[nodiscard]] int get_max_threads_per_sm() const - { - return m_prop.maxThreadsPerMultiProcessor; - } + [[nodiscard]] int get_max_threads_per_sm() const { return m_prop.maxThreadsPerMultiProcessor; } /// @return The maximum number of threads per block. - [[nodiscard]] int get_max_threads_per_block() const - { - return m_prop.maxThreadsPerBlock; - } + [[nodiscard]] int get_max_threads_per_block() const { return m_prop.maxThreadsPerBlock; } /// @return The number of registers per SM. - [[nodiscard]] int get_registers_per_sm() const - { - return m_prop.regsPerMultiprocessor; - } + [[nodiscard]] int get_registers_per_sm() const { return m_prop.regsPerMultiprocessor; } /// @return The number of registers per block. - [[nodiscard]] int get_registers_per_block() const - { - return m_prop.regsPerBlock; - } + [[nodiscard]] int get_registers_per_block() const { return m_prop.regsPerBlock; } /// @return The total number of bytes available in global memory. - [[nodiscard]] std::size_t get_global_memory_size() const - { - return m_prop.totalGlobalMem; - } + [[nodiscard]] std::size_t get_global_memory_size() const { return m_prop.totalGlobalMem; } struct memory_info { @@ -176,16 +148,13 @@ struct device_info } /// @return The width of the global memory bus in bits. - [[nodiscard]] int get_global_memory_bus_width() const - { - return m_prop.memoryBusWidth; - } + [[nodiscard]] int get_global_memory_bus_width() const { return m_prop.memoryBusWidth; } //// @return The global memory bus bandwidth in bytes/sec. [[nodiscard]] std::size_t get_global_memory_bus_bandwidth() const { // 2 is for DDR, CHAR_BITS to convert bus_width to bytes. return 2 * this->get_global_memory_bus_peak_clock_rate() * - (this->get_global_memory_bus_width() / CHAR_BIT); + static_cast(this->get_global_memory_bus_width() / CHAR_BIT); } /// @return The size of the L2 cache in bytes. @@ -201,10 +170,7 @@ struct device_info } /// @return The available amount of shared memory in bytes per block. - [[nodiscard]] std::size_t get_shared_memory_per_block() const - { - return m_prop.sharedMemPerBlock; - } + [[nodiscard]] std::size_t get_shared_memory_per_block() const { return m_prop.sharedMemPerBlock; } /// @return True if ECC is enabled on this device. [[nodiscard]] bool get_ecc_state() const { return m_prop.ECCEnabled; } @@ -224,23 +190,11 @@ struct device_info #endif /// @return A cached copy of the device's cudaDeviceProp. - [[nodiscard]] const cudaDeviceProp &get_cuda_device_prop() const - { - return m_prop; - } + [[nodiscard]] const cudaDeviceProp &get_cuda_device_prop() const { return m_prop; } - [[nodiscard]] bool operator<(const device_info &o) const - { - return m_id < o.m_id; - } - [[nodiscard]] bool operator==(const device_info &o) const - { - return m_id == o.m_id; - } - [[nodiscard]] bool operator!=(const device_info &o) const - { - return m_id != o.m_id; - } + [[nodiscard]] bool operator<(const device_info &o) const { return m_id < o.m_id; } + [[nodiscard]] bool operator==(const device_info &o) const { return m_id == o.m_id; } + [[nodiscard]] bool operator!=(const device_info &o) const { return m_id != o.m_id; } private: int m_id; @@ -267,11 +221,10 @@ try { nvbench::detail::device_scope _{dev_id}; cudaFuncAttributes attr{}; - NVBENCH_CUDA_CALL( - cudaFuncGetAttributes(&attr, ((const void*)nvbench::detail::noop_kernel_ptr) )); + NVBENCH_CUDA_CALL(cudaFuncGetAttributes(&attr, ((const void *)nvbench::detail::noop_kernel_ptr))); return attr.ptxVersion * 10; } -catch(...) +catch (...) { // Fail gracefully when no appropriate PTX is found for this device. return -1; } diff --git a/nvbench/device_manager.cu b/nvbench/device_manager.cu index 136b20a3..a70a18c4 100644 --- a/nvbench/device_manager.cu +++ b/nvbench/device_manager.cu @@ -18,10 +18,11 @@ #include +#include + #include #include - -#include +#include namespace nvbench { @@ -44,4 +45,13 @@ device_manager::device_manager() } } +const nvbench::device_info &device_manager::get_device(int id) +{ + if (id < 0) + { + NVBENCH_THROW(std::runtime_error, "Negative index: {}.", id); + } + return m_devices.at(static_cast(id)); +} + } // namespace nvbench diff --git a/nvbench/device_manager.cuh b/nvbench/device_manager.cuh index 94907755..36082b81 100644 --- a/nvbench/device_manager.cuh +++ b/nvbench/device_manager.cuh @@ -40,10 +40,7 @@ struct device_manager /** * @return The total number of detected CUDA devices. */ - [[nodiscard]] int get_number_of_devices() const - { - return static_cast(m_devices.size()); - } + [[nodiscard]] int get_number_of_devices() const { return static_cast(m_devices.size()); } /** * @return The number of devices actually used by all benchmarks. @@ -57,39 +54,27 @@ struct device_manager /** * @return The device_info object corresponding to `id`. */ - [[nodiscard]] const nvbench::device_info &get_device(int id) - { - return m_devices.at(id); - } + [[nodiscard]] const nvbench::device_info &get_device(int id); /** * @return A vector containing device_info objects for all detected CUDA * devices. */ - [[nodiscard]] const device_info_vector &get_devices() const - { - return m_devices; - } + [[nodiscard]] const device_info_vector &get_devices() const { return m_devices; } /** * @return A vector containing device_info objects for devices that are * actively used by all benchmarks. * @note This is only valid after nvbench::option_parser::parse executes. */ - [[nodiscard]] const device_info_vector &get_used_devices() const - { - return m_used_devices; - } + [[nodiscard]] const device_info_vector &get_used_devices() const { return m_used_devices; } private: device_manager(); friend struct option_parser; - void set_used_devices(device_info_vector devices) - { - m_used_devices = std::move(devices); - } + void set_used_devices(device_info_vector devices) { m_used_devices = std::move(devices); } device_info_vector m_devices; device_info_vector m_used_devices; diff --git a/nvbench/enum_type_list.cuh b/nvbench/enum_type_list.cuh index 6ec529bc..614057f1 100644 --- a/nvbench/enum_type_list.cuh +++ b/nvbench/enum_type_list.cuh @@ -64,10 +64,7 @@ struct type_strings> return std::to_string(Value); } - static std::string description() - { - return nvbench::demangle>(); - } + static std::string description() { return nvbench::demangle>(); } }; } // namespace nvbench @@ -86,15 +83,13 @@ struct type_strings> * \relatesalso enum_type_list * \relatesalso nvbench::enum_type_list */ -#define NVBENCH_DECLARE_ENUM_TYPE_STRINGS(T, \ - input_generator, \ - description_generator) \ - namespace nvbench \ - { \ - template \ - struct type_strings> \ - { \ - static std::string input_string() { return input_generator(Value); } \ - static std::string description() { return description_generator(Value); } \ - }; \ +#define NVBENCH_DECLARE_ENUM_TYPE_STRINGS(T, input_generator, description_generator) \ + namespace nvbench \ + { \ + template \ + struct type_strings> \ + { \ + static std::string input_string() { return input_generator(Value); } \ + static std::string description() { return description_generator(Value); } \ + }; \ } diff --git a/nvbench/exec_tag.cuh b/nvbench/exec_tag.cuh index b49ed36d..c935e4cb 100644 --- a/nvbench/exec_tag.cuh +++ b/nvbench/exec_tag.cuh @@ -31,16 +31,16 @@ enum class exec_flag none = 0x0, // Modifiers: - timer = 0x01, // KernelLauncher uses manual timing - no_block = 0x02, // Disables use of `blocking_kernel`. - sync = 0x04, // KernelLauncher has indicated that it will sync - run_once = 0x08, // Only run the benchmark once (for profiling). + timer = 0x01, // KernelLauncher uses manual timing + no_block = 0x02, // Disables use of `blocking_kernel`. + sync = 0x04, // KernelLauncher has indicated that it will sync + run_once = 0x08, // Only run the benchmark once (for profiling). modifier_mask = timer | no_block | sync | run_once, // Measurement types: - cold = 0x0100, // measure_hot - hot = 0x0200, // measure_cold - measure_mask = cold | hot + cold = 0x0100, // measure_cold + hot = 0x0200, // measure_hot + measure_mask = cold | hot }; } // namespace nvbench::detail @@ -120,7 +120,9 @@ constexpr inline auto timer = nvbench::exec_tag::impl::timer; /// Modifier used to indicate that the KernelGenerator will perform CUDA /// synchronizations. Without this flag such benchmarks will deadlock. -constexpr inline auto sync = nvbench::exec_tag::impl::no_block | - nvbench::exec_tag::impl::sync; +constexpr inline auto sync = nvbench::exec_tag::impl::no_block | nvbench::exec_tag::impl::sync; + +/// Modifier used to indicate that batched measurements should be disabled +constexpr inline auto no_batch = nvbench::exec_tag::impl::cold; } // namespace nvbench::exec_tag diff --git a/nvbench/flags.cuh b/nvbench/flags.cuh index 30ba84eb..cefefa3c 100644 --- a/nvbench/flags.cuh +++ b/nvbench/flags.cuh @@ -20,24 +20,24 @@ #include -#define NVBENCH_DECLARE_FLAGS(T) \ - constexpr inline T operator|(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) | static_cast(v2)); \ - } \ - constexpr inline T operator&(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) & static_cast(v2)); \ - } \ - constexpr inline T operator^(T v1, T v2) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(static_cast(v1) ^ static_cast(v2)); \ - } \ - constexpr inline T operator~(T v1) \ - { \ - using UT = std::underlying_type_t; \ - return static_cast(~static_cast(v1)); \ +#define NVBENCH_DECLARE_FLAGS(T) \ + constexpr inline T operator|(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) | static_cast(v2)); \ + } \ + constexpr inline T operator&(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) & static_cast(v2)); \ + } \ + constexpr inline T operator^(T v1, T v2) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(static_cast(v1) ^ static_cast(v2)); \ + } \ + constexpr inline T operator~(T v1) \ + { \ + using UT = std::underlying_type_t; \ + return static_cast(~static_cast(v1)); \ } diff --git a/nvbench/float64_axis.cuh b/nvbench/float64_axis.cuh index 0d606512..ef7b089d 100644 --- a/nvbench/float64_axis.cuh +++ b/nvbench/float64_axis.cuh @@ -36,20 +36,11 @@ struct float64_axis final : public axis_base ~float64_axis() final; - void set_inputs(std::vector inputs) - { - m_values = std::move(inputs); - } - [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const - { - return m_values[i]; - } + void set_inputs(std::vector inputs) { m_values = std::move(inputs); } + [[nodiscard]] nvbench::float64_t get_value(std::size_t i) const { return m_values[i]; } private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const final { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_values.size(); } std::string do_get_input_string(std::size_t i) const final; std::string do_get_description(std::size_t i) const final; diff --git a/nvbench/git_revision.cuh b/nvbench/git_revision.cuh index 2b29e920..50fc9da7 100644 --- a/nvbench/git_revision.cuh +++ b/nvbench/git_revision.cuh @@ -1,20 +1,20 @@ /* -* Copyright 2021 NVIDIA Corporation -* -* Licensed under the Apache License, Version 2.0 with the LLVM exception -* (the "License"); you may not use this file except in compliance with -* the License. -* -* You may obtain a copy of the License at -* -* http://llvm.org/foundation/relicensing/LICENSE.txt -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once @@ -25,7 +25,5 @@ #define NVBENCH_GIT_SHA1 NVBench_GIT_SHA1 #define NVBENCH_GIT_VERSION NVBench_GIT_VERSION #ifdef NVBench_GIT_IS_DIRTY -# define NVBENCH_GIT_IS_DIRTY +#define NVBENCH_GIT_IS_DIRTY #endif - - diff --git a/nvbench/int64_axis.cuh b/nvbench/int64_axis.cuh index a6cec2e3..adc95d11 100644 --- a/nvbench/int64_axis.cuh +++ b/nvbench/int64_axis.cuh @@ -58,34 +58,27 @@ struct int64_axis final : public axis_base return static_cast(m_flags & int64_axis_flags::power_of_two); } - void set_inputs(std::vector inputs, - int64_axis_flags flags = int64_axis_flags::none); + void set_inputs(std::vector inputs, int64_axis_flags flags = int64_axis_flags::none); - [[nodiscard]] const std::vector &get_inputs() const - { - return m_inputs; - }; + [[nodiscard]] const std::vector &get_inputs() const { return m_inputs; }; [[nodiscard]] int64_t get_value(std::size_t i) const { return m_values[i]; }; - [[nodiscard]] const std::vector &get_values() const - { - return m_values; - }; + [[nodiscard]] const std::vector &get_values() const { return m_values; }; int64_axis_flags get_flags() const { return m_flags; } // Helper functions for pow2 conversions: static nvbench::int64_t compute_pow2(nvbench::int64_t exponent) { - return 1ll << exponent; + return nvbench::int64_t{1} << exponent; } // UB if value < 0. static nvbench::int64_t compute_log2(nvbench::int64_t value) { // TODO use functions in C++20? - nvbench::uint64_t bits = static_cast(value); + nvbench::uint64_t bits = static_cast(value); nvbench::int64_t exponent = 0; while ((bits >>= 1) != 0ull) { @@ -95,10 +88,7 @@ struct int64_axis final : public axis_base }; private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const final { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_inputs.size(); } std::string do_get_input_string(std::size_t) const final; std::string do_get_description(std::size_t) const final; diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx index 24ff913d..599c388f 100644 --- a/nvbench/int64_axis.cxx +++ b/nvbench/int64_axis.cxx @@ -66,9 +66,8 @@ std::string int64_axis::do_get_input_string(std::size_t i) const std::string int64_axis::do_get_description(std::size_t i) const { - return this->is_power_of_two() - ? fmt::format("2^{} = {}", m_inputs[i], m_values[i]) - : std::string{}; + return this->is_power_of_two() ? fmt::format("2^{} = {}", m_inputs[i], m_values[i]) + : std::string{}; } std::string_view int64_axis::do_get_flags_as_string() const diff --git a/nvbench/internal/markdown_table.cuh b/nvbench/internal/markdown_table.cuh index bb721c65..518f57b9 100644 --- a/nvbench/internal/markdown_table.cuh +++ b/nvbench/internal/markdown_table.cuh @@ -85,8 +85,7 @@ private: " {:^{}} ", col.header, col.max_width); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } return fmt::format_to(iter, "\n"); } @@ -102,8 +101,7 @@ private: "{:-^{}}", "", col.max_width + 2); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } return fmt::format_to(iter, "\n"); } @@ -116,8 +114,7 @@ private: for (std::size_t row = 0; row < m_num_rows; ++row) { - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); for (const column &col : m_columns) { iter = fmt::format_to(iter, @@ -125,8 +122,7 @@ private: " {:>{}} ", col.rows[row], col.max_width); - iter = - fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); + iter = fmt::format_to(iter, m_color ? (m_bg | m_vdiv_fg) : m_no_style, "|"); } // cols iter = fmt::format_to(iter, "\n"); diff --git a/nvbench/internal/nvml.cuh b/nvbench/internal/nvml.cuh index 497f31a1..05c6764a 100644 --- a/nvbench/internal/nvml.cuh +++ b/nvbench/internal/nvml.cuh @@ -32,6 +32,16 @@ namespace nvbench::nvml { +// RAII struct that initializes and shuts down NVML +// Needs to be constructed and kept alive while using nvml +struct NVMLLifetimeManager +{ + NVMLLifetimeManager(); + ~NVMLLifetimeManager(); +private: + bool m_inited{false}; +}; + /// Base class for NVML-specific exceptions struct error : std::runtime_error { @@ -74,10 +84,7 @@ struct call_failed : error [[nodiscard]] nvmlReturn_t get_error_code() const { return m_error_code; } - [[nodiscard]] const std::string &get_error_string() const - { - return m_error_string; - } + [[nodiscard]] const std::string &get_error_string() const { return m_error_string; } private: nvmlReturn_t m_error_code; @@ -90,30 +97,26 @@ private: #ifdef NVBENCH_HAS_NVML -#define NVBENCH_NVML_CALL(call) \ - do \ - { \ - const auto _rr = call; \ - if (_rr != NVML_SUCCESS) \ - { \ - throw nvbench::nvml::call_failed(__FILE__, \ - __LINE__, \ - #call, \ - _rr, \ - nvmlErrorString(_rr)); \ - } \ +#define NVBENCH_NVML_CALL(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, nvmlErrorString(_rr)); \ + } \ } while (false) // Same as above, but used for nvmlInit(), where a failure means that // nvmlErrorString is not available. -#define NVBENCH_NVML_CALL_NO_API(call) \ - do \ - { \ - const auto _rr = call; \ - if (_rr != NVML_SUCCESS) \ - { \ - throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \ - } \ +#define NVBENCH_NVML_CALL_NO_API(call) \ + do \ + { \ + const auto _rr = call; \ + if (_rr != NVML_SUCCESS) \ + { \ + throw nvbench::nvml::call_failed(__FILE__, __LINE__, #call, _rr, ""); \ + } \ } while (false) #endif // NVBENCH_HAS_NVML diff --git a/nvbench/internal/nvml.cxx b/nvbench/internal/nvml.cxx index 4f750bce..025515d2 100644 --- a/nvbench/internal/nvml.cxx +++ b/nvbench/internal/nvml.cxx @@ -18,54 +18,38 @@ #include -#include - -#include - -#include - -#include - -namespace +namespace nvbench::nvml { +NVMLLifetimeManager::NVMLLifetimeManager() +{ +#ifdef NVBENCH_HAS_NVML + try + { + NVBENCH_NVML_CALL_NO_API(nvmlInit()); + m_inited = true; + } + catch (std::exception &e) + { + fmt::print("NVML initialization failed:\n {}", e.what()); + } +#endif +} -// RAII struct that initializes and shuts down NVML -struct NVMLLifetimeManager +NVMLLifetimeManager::~NVMLLifetimeManager() { - NVMLLifetimeManager() +#ifdef NVBENCH_HAS_NVML + if (m_inited) { try { - NVBENCH_NVML_CALL_NO_API(nvmlInit()); - m_inited = true; + NVBENCH_NVML_CALL_NO_API(nvmlShutdown()); } catch (std::exception &e) { - fmt::print("NVML initialization failed:\n {}", e.what()); - } - } - - ~NVMLLifetimeManager() - { - if (m_inited) - { - try - { - NVBENCH_NVML_CALL_NO_API(nvmlShutdown()); - } - catch (std::exception &e) - { - fmt::print("NVML shutdown failed:\n {}", e.what()); - } + fmt::print("NVML shutdown failed:\n {}", e.what()); } } +#endif +} -private: - bool m_inited{false}; -}; - -// NVML's lifetime should extend for the entirety of the process, so store in a -// global. -auto nvml_lifetime = NVMLLifetimeManager{}; - -} // namespace +} // namespace nvbench::nvml diff --git a/nvbench/internal/table_builder.cuh b/nvbench/internal/table_builder.cuh index 81fca0a0..ae029b41 100644 --- a/nvbench/internal/table_builder.cuh +++ b/nvbench/internal/table_builder.cuh @@ -47,18 +47,14 @@ struct table_builder const std::string &header, std::string value) { - auto iter = std::find_if(m_columns.begin(), - m_columns.end(), - [&column_key](const column &col) { - return col.key == column_key; - }); + auto iter = std::find_if(m_columns.begin(), m_columns.end(), [&column_key](const column &col) { + return col.key == column_key; + }); auto &col = iter == m_columns.end() - ? m_columns.emplace_back(column{column_key, - header, - std::vector{}, - header.size()}) - : *iter; + ? m_columns.emplace_back( + column{column_key, header, std::vector{}, header.size()}) + : *iter; col.max_width = std::max(col.max_width, value.size()); if (col.rows.size() <= row) @@ -76,11 +72,9 @@ struct table_builder std::size_t{}, [](const auto &a, const auto &b) { return a > b ? a : b; }, [](const column &col) { return col.rows.size(); }); - std::for_each(m_columns.begin(), - m_columns.end(), - [num_rows = m_num_rows](column &col) { - col.rows.resize(num_rows); - }); + std::for_each(m_columns.begin(), m_columns.end(), [num_rows = m_num_rows](column &col) { + col.rows.resize(num_rows); + }); } }; diff --git a/nvbench/json_printer.cu b/nvbench/json_printer.cu index 7c99f2af..f7e337ae 100644 --- a/nvbench/json_printer.cu +++ b/nvbench/json_printer.cu @@ -43,10 +43,14 @@ #include #include -#ifdef __GNUC__ +#if __has_include() +#include +namespace fs = std::filesystem; +#elif __has_include() #include +namespace fs = std::experimental::filesystem; #else -#include +static_assert(false, "No or found."); #endif #if NVBENCH_CPP_DIALECT >= 2020 @@ -126,11 +130,10 @@ std::string json_printer::version_t::get_string() const return fmt::format("{}.{}.{}", this->major, this->minor, this->patch); } -void json_printer::do_process_bulk_data_float64( - state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) +void json_printer::do_process_bulk_data_float64(state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) { printer_base::do_process_bulk_data_float64(state, tag, hint, data); @@ -141,12 +144,6 @@ void json_printer::do_process_bulk_data_float64( if (hint == "sample_times") { -#ifdef __GNUC__ - namespace fs = std::experimental::filesystem; -#else - namespace fs = std::filesystem; -#endif - nvbench::cpu_timer timer; timer.start(); @@ -157,16 +154,12 @@ void json_printer::do_process_bulk_data_float64( { if (!fs::create_directory(result_path)) { - NVBENCH_THROW(std::runtime_error, - "{}", - "Failed to create result directory '{}'."); + NVBENCH_THROW(std::runtime_error, "{}", "Failed to create result directory '{}'."); } } else if (!fs::is_directory(result_path)) { - NVBENCH_THROW(std::runtime_error, - "{}", - "'{}' exists and is not a directory."); + NVBENCH_THROW(std::runtime_error, "{}", "'{}' exists and is not a directory."); } const auto file_id = m_num_jsonbin_files++; @@ -197,16 +190,12 @@ void json_printer::do_process_bulk_data_float64( } catch (std::exception &e) { - if (auto printer_opt_ref = state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); - printer.log(nvbench::log_level::warn, - fmt::format("Error writing {} ({}) to {}: {}", - tag, - hint, - result_path.string(), - e.what())); + printer.log( + nvbench::log_level::warn, + fmt::format("Error writing {} ({}) to {}: {}", tag, hint, result_path.string(), e.what())); } } // end catch @@ -221,18 +210,45 @@ void json_printer::do_process_bulk_data_float64( summ.set_string("hide", "Not needed in table."); timer.stop(); - if (auto printer_opt_ref = state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); - printer.log(nvbench::log_level::info, - fmt::format("Wrote '{}' in {:>6.3f}ms", - result_path.string(), - timer.get_duration() * 1000)); + printer.log( + nvbench::log_level::info, + fmt::format("Wrote '{}' in {:>6.3f}ms", result_path.string(), timer.get_duration() * 1000)); } } // end hint == sample_times } +static void add_devices_section(nlohmann::ordered_json &root) +{ + auto &devices = root["devices"]; + for (const auto &dev_info : nvbench::device_manager::get().get_devices()) + { + auto &device = devices.emplace_back(); + device["id"] = dev_info.get_id(); + device["name"] = dev_info.get_name(); + device["sm_version"] = dev_info.get_sm_version(); + device["ptx_version"] = dev_info.get_ptx_version(); + device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate(); + device["number_of_sms"] = dev_info.get_number_of_sms(); + device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm(); + device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm(); + device["max_threads_per_block"] = dev_info.get_max_threads_per_block(); + device["registers_per_sm"] = dev_info.get_registers_per_sm(); + device["registers_per_block"] = dev_info.get_registers_per_block(); + device["global_memory_size"] = dev_info.get_global_memory_size(); + device["global_memory_bus_peak_clock_rate"] = + dev_info.get_global_memory_bus_peak_clock_rate(); + device["global_memory_bus_width"] = dev_info.get_global_memory_bus_width(); + device["global_memory_bus_bandwidth"] = dev_info.get_global_memory_bus_bandwidth(); + device["l2_cache_size"] = dev_info.get_l2_cache_size(); + device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); + device["shared_memory_per_block"] = dev_info.get_shared_memory_per_block(); + device["ecc_state"] = dev_info.get_ecc_state(); + } +} + void json_printer::do_print_benchmark_results(const benchmark_vector &benches) { nlohmann::ordered_json root; @@ -285,36 +301,7 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) } // "version" } // "meta" - { - auto &devices = root["devices"]; - for (const auto &dev_info : nvbench::device_manager::get().get_devices()) - { - auto &device = devices.emplace_back(); - device["id"] = dev_info.get_id(); - device["name"] = dev_info.get_name(); - device["sm_version"] = dev_info.get_sm_version(); - device["ptx_version"] = dev_info.get_ptx_version(); - device["sm_default_clock_rate"] = dev_info.get_sm_default_clock_rate(); - device["number_of_sms"] = dev_info.get_number_of_sms(); - device["max_blocks_per_sm"] = dev_info.get_max_blocks_per_sm(); - device["max_threads_per_sm"] = dev_info.get_max_threads_per_sm(); - device["max_threads_per_block"] = dev_info.get_max_threads_per_block(); - device["registers_per_sm"] = dev_info.get_registers_per_sm(); - device["registers_per_block"] = dev_info.get_registers_per_block(); - device["global_memory_size"] = dev_info.get_global_memory_size(); - device["global_memory_bus_peak_clock_rate"] = - dev_info.get_global_memory_bus_peak_clock_rate(); - device["global_memory_bus_width"] = - dev_info.get_global_memory_bus_width(); - device["global_memory_bus_bandwidth"] = - dev_info.get_global_memory_bus_bandwidth(); - device["l2_cache_size"] = dev_info.get_l2_cache_size(); - device["shared_memory_per_sm"] = dev_info.get_shared_memory_per_sm(); - device["shared_memory_per_block"] = - dev_info.get_shared_memory_per_block(); - device["ecc_state"] = dev_info.get_ecc_state(); - } - } // "devices" + add_devices_section(root); { auto &benchmarks = root["benchmarks"]; @@ -358,23 +345,19 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) switch (axis_ptr->get_type()) { case nvbench::axis_type::type: - value["is_active"] = - static_cast(*axis_ptr).get_is_active(i); + value["is_active"] = static_cast(*axis_ptr).get_is_active(i); break; case nvbench::axis_type::int64: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; case nvbench::axis_type::float64: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; case nvbench::axis_type::string: - value["value"] = - static_cast(*axis_ptr).get_value(i); + value["value"] = static_cast(*axis_ptr).get_value(i); break; default: break; @@ -454,4 +437,73 @@ void json_printer::do_print_benchmark_results(const benchmark_vector &benches) m_ostream << root.dump(2) << "\n"; } +void json_printer::do_print_benchmark_list(const benchmark_vector &benches) +{ + if (benches.empty()) + { + return; + } + + nlohmann::ordered_json root; + auto &benchmarks = root["benchmarks"]; + + for (const auto &bench_ptr : benches) + { + const auto bench_index = benchmarks.size(); + auto &bench = benchmarks.emplace_back(); + + bench["name"] = bench_ptr->get_name(); + bench["index"] = bench_index; + + // We have to ensure that the axes are represented as an array, not an + // nil object when there are no axes. + auto &axes = bench["axes"] = nlohmann::json::array(); + + for (const auto &axis_ptr : bench_ptr->get_axes().get_axes()) + { + auto &axis = axes.emplace_back(); + + axis["name"] = axis_ptr->get_name(); + axis["type"] = axis_ptr->get_type_as_string(); + axis["flags"] = axis_ptr->get_flags_as_string(); + + auto &values = axis["values"]; + const auto axis_size = axis_ptr->get_size(); + for (std::size_t i = 0; i < axis_size; ++i) + { + auto &value = values.emplace_back(); + value["input_string"] = axis_ptr->get_input_string(i); + value["description"] = axis_ptr->get_description(i); + + switch (axis_ptr->get_type()) + { + case nvbench::axis_type::int64: + value["value"] = static_cast(*axis_ptr).get_value(i); + break; + + case nvbench::axis_type::float64: + value["value"] = static_cast(*axis_ptr).get_value(i); + break; + + case nvbench::axis_type::string: + value["value"] = static_cast(*axis_ptr).get_value(i); + break; + + default: + break; + } // end switch (axis type) + } // end foreach axis value + } + } // end foreach bench + + m_ostream << root.dump(2) << "\n"; +} + +void json_printer::print_devices_json() +{ + nlohmann::ordered_json root; + add_devices_section(root); + m_ostream << root.dump(2) << "\n"; +} + } // namespace nvbench diff --git a/nvbench/json_printer.cuh b/nvbench/json_printer.cuh index a58448f3..8457687f 100644 --- a/nvbench/json_printer.cuh +++ b/nvbench/json_printer.cuh @@ -38,9 +38,7 @@ struct json_printer : nvbench::printer_base { using printer_base::printer_base; - json_printer(std::ostream &stream, - std::string stream_name, - bool enable_binary_output) + json_printer(std::ostream &stream, std::string stream_name, bool enable_binary_output) : printer_base(stream, std::move(stream_name)) , m_enable_binary_output{enable_binary_output} {} @@ -59,24 +57,20 @@ struct json_printer : nvbench::printer_base [[nodiscard]] static version_t get_json_file_version(); - [[nodiscard]] bool get_enable_binary_output() const - { - return m_enable_binary_output; - } + [[nodiscard]] bool get_enable_binary_output() const { return m_enable_binary_output; } void set_enable_binary_output(bool b) { m_enable_binary_output = b; } + void print_devices_json(); + protected: // Virtual API from printer_base: - void do_log_argv(const std::vector& argv) override - { - m_argv = argv; - } - void do_process_bulk_data_float64( - nvbench::state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) override; + void do_log_argv(const std::vector &argv) override { m_argv = argv; } + void do_process_bulk_data_float64(nvbench::state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) override; void do_print_benchmark_results(const benchmark_vector &benches) override; + void do_print_benchmark_list(const benchmark_vector &) override; bool m_enable_binary_output{false}; std::size_t m_num_jsonbin_files{}; diff --git a/nvbench/launch.cuh b/nvbench/launch.cuh index 4b973f32..c45f1fda 100644 --- a/nvbench/launch.cuh +++ b/nvbench/launch.cuh @@ -42,20 +42,17 @@ struct launch {} // move-only - launch(const launch &) = delete; - launch(launch &&) = default; + launch(const launch &) = delete; + launch(launch &&) = default; launch &operator=(const launch &) = delete; - launch &operator=(launch &&) = default; + launch &operator=(launch &&) = delete; /** * @return a CUDA stream that all kernels and other stream-ordered CUDA work * must use. This stream can be changed by the `KernelGenerator` using the * `nvbench::state::set_cuda_stream` method. */ - __forceinline__ const nvbench::cuda_stream &get_stream() const - { - return m_stream; - }; + __forceinline__ const nvbench::cuda_stream &get_stream() const { return m_stream; }; private: // The stream is owned by the `nvbench::state` associated with this launch. diff --git a/nvbench/main.cuh b/nvbench/main.cuh index 4c1588cd..cd809ba4 100644 --- a/nvbench/main.cuh +++ b/nvbench/main.cuh @@ -25,64 +25,229 @@ #include #include +#include #include -#define NVBENCH_MAIN \ - int main(int argc, char const *const *argv) \ - try \ - { \ - NVBENCH_MAIN_BODY(argc, argv); \ - NVBENCH_CUDA_CALL(cudaDeviceReset()); \ - return 0; \ - } \ - catch (std::exception & e) \ - { \ - std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; \ - return 1; \ - } \ - catch (...) \ - { \ - std::cerr << "\nNVBench encountered an unknown error.\n"; \ - return 1; \ +// Advanced users can rebuild NVBench's `main` function using the macros in this file, or replace +// them with customized implementations. + +// Customization point, called before NVBench initialization. +#ifndef NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv) []() {}() +#endif + +// Customization point, called after NVBench initialization. +#ifndef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) []() {}() +#endif + +// Customization point, called before NVBench parsing. Update argc/argv if needed. +// argc/argv are the usual command line arguments types. The ARGS version of this +// macro is a bit more convenient. +#ifndef NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER(argc, argv) []() {}() +#endif + +// Customization point, called before NVBench parsing. Update args if needed. +// Args is a vector of strings, each element is an argument. +#ifndef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) []() {}() +#endif + +// Customization point, called before NVBench parsing. +#ifndef NVBENCH_MAIN_PARSE_CUSTOM_PRE +#define NVBENCH_MAIN_PARSE_CUSTOM_PRE(parser, args) []() {}() +#endif + +// Customization point, called after NVBench parsing. +#ifndef NVBENCH_MAIN_PARSE_CUSTOM_POST +#define NVBENCH_MAIN_PARSE_CUSTOM_POST(parser) []() {}() +#endif + +// Customization point, called before NVBench finalization. +#ifndef NVBENCH_MAIN_FINALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_FINALIZE_CUSTOM_PRE() []() {}() +#endif + +// Customization point, called after NVBench finalization. +#ifndef NVBENCH_MAIN_FINALIZE_CUSTOM_POST +#define NVBENCH_MAIN_FINALIZE_CUSTOM_POST() []() {}() +#endif + +// Customization point, use to catch addition exceptions. +#ifndef NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#define NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#endif + +/************************************ Default implementation **************************************/ + +#ifndef NVBENCH_MAIN +#define NVBENCH_MAIN \ + int main(int argc, char **argv) \ + try \ + { \ + NVBENCH_MAIN_BODY(argc, argv); \ + return 0; \ + } \ + NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM \ + NVBENCH_MAIN_CATCH_EXCEPTIONS +#endif + +#ifndef NVBENCH_MAIN_BODY +#define NVBENCH_MAIN_BODY(argc, argv) \ + NVBENCH_MAIN_INITIALIZE(argc, argv); \ + { \ + NVBENCH_MAIN_PARSE(argc, argv); \ + \ + NVBENCH_MAIN_PRINT_PREAMBLE(parser); \ + NVBENCH_MAIN_RUN_BENCHMARKS(parser); \ + NVBENCH_MAIN_PRINT_EPILOGUE(parser); \ + \ + NVBENCH_MAIN_PRINT_RESULTS(parser); \ + } /* Tear down parser before finalization */ \ + NVBENCH_MAIN_FINALIZE(); \ + return 0; +#endif + +#ifndef NVBENCH_MAIN_INITIALIZE +#define NVBENCH_MAIN_INITIALIZE(argc, argv) \ + { /* Open a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv); \ + nvbench::detail::main_initialize(argc, argv); \ + { /* Open a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) +#endif + +#ifndef NVBENCH_MAIN_PARSE +#define NVBENCH_MAIN_PARSE(argc, argv) \ + NVBENCH_MAIN_CUSTOM_ARGC_ARGV_HANDLER(argc, argv); \ + std::vector args = nvbench::detail::main_convert_args(argc, argv); \ + NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args); \ + nvbench::option_parser parser; \ + NVBENCH_MAIN_PARSE_CUSTOM_PRE(parser, args); \ + parser.parse(args); \ + NVBENCH_MAIN_PARSE_CUSTOM_POST(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_PREAMBLE +#define NVBENCH_MAIN_PRINT_PREAMBLE(parser) nvbench::detail::main_print_preamble(parser) +#endif + +#ifndef NVBENCH_MAIN_RUN_BENCHMARKS +#define NVBENCH_MAIN_RUN_BENCHMARKS(parser) nvbench::detail::main_run_benchmarks(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_EPILOGUE +#define NVBENCH_MAIN_PRINT_EPILOGUE(parser) nvbench::detail::main_print_epilogue(parser) +#endif + +#ifndef NVBENCH_MAIN_PRINT_RESULTS +#define NVBENCH_MAIN_PRINT_RESULTS(parser) nvbench::detail::main_print_results(parser) +#endif + +#ifndef NVBENCH_MAIN_FINALIZE +#define NVBENCH_MAIN_FINALIZE() \ + NVBENCH_MAIN_FINALIZE_CUSTOM_PRE(); \ + } /* Close a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + nvbench::detail::main_finalize(); \ + NVBENCH_MAIN_FINALIZE_CUSTOM_POST(); \ + } /* Close a scope to ensure that the inner initialize/finalize hooks clean up in order. */ \ + []() {}() +#endif + +#ifndef NVBENCH_MAIN_CATCH_EXCEPTIONS +#define NVBENCH_MAIN_CATCH_EXCEPTIONS \ + catch (std::exception & e) \ + { \ + std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n"; \ + return 1; \ + } \ + catch (...) \ + { \ + std::cerr << "\nNVBench encountered an unknown error.\n"; \ + return 1; \ } +#endif -#ifdef NVBENCH_HAS_CUPTI -#define NVBENCH_INITIALIZE_DRIVER_API NVBENCH_DRIVER_API_CALL(cuInit(0)) +namespace nvbench::detail +{ + +inline void set_env(const char *name, const char *value) +{ +#ifdef _MSC_VER + _putenv_s(name, value); #else -// clang-format off -#define NVBENCH_INITIALIZE_DRIVER_API do {} while (false) -// clang-format on -#endif - -#define NVBENCH_MAIN_PARSE(argc, argv) \ - nvbench::option_parser parser; \ - parser.parse(argc, argv) - -#define NVBENCH_MAIN_BODY(argc, argv) \ - do \ - { \ - NVBENCH_INITIALIZE_DRIVER_API; \ - NVBENCH_MAIN_PARSE(argc, argv); \ - auto &printer = parser.get_printer(); \ - \ - printer.print_device_info(); \ - printer.print_log_preamble(); \ - auto &benchmarks = parser.get_benchmarks(); \ - \ - std::size_t total_states = 0; \ - for (auto &bench_ptr : benchmarks) \ - { \ - total_states += bench_ptr->get_config_count(); \ - } \ - printer.set_total_state_count(total_states); \ - \ - printer.set_completed_state_count(0); \ - for (auto &bench_ptr : benchmarks) \ - { \ - bench_ptr->set_printer(printer); \ - bench_ptr->run(); \ - bench_ptr->clear_printer(); \ - } \ - printer.print_log_epilogue(); \ - printer.print_benchmark_results(benchmarks); \ - } while (false) + setenv(name, value, 1); +#endif +} + +inline void main_initialize(int, char **) +{ + // See NVIDIA/NVBench#136 for CUDA_MODULE_LOADING + set_env("CUDA_MODULE_LOADING", "EAGER"); + + // Initialize CUDA driver API if needed: +#ifdef NVBENCH_HAS_CUPTI + NVBENCH_DRIVER_API_CALL(cuInit(0)); +#endif + + // Initialize the benchmarks *after* setting up the CUDA environment: + nvbench::benchmark_manager::get().initialize(); +} + +inline std::vector main_convert_args(int argc, char const *const *argv) +{ + std::vector args; + for (int i = 0; i < argc; ++i) + { + args.push_back(argv[i]); + } + return args; +} + +inline void main_print_preamble(option_parser &parser) +{ + auto &printer = parser.get_printer(); + + printer.print_device_info(); + printer.print_log_preamble(); +} + +inline void main_run_benchmarks(option_parser &parser) +{ + auto &printer = parser.get_printer(); + auto &benchmarks = parser.get_benchmarks(); + + std::size_t total_states = 0; + for (auto &bench_ptr : benchmarks) + { + total_states += bench_ptr->get_config_count(); + } + + printer.set_completed_state_count(0); + printer.set_total_state_count(total_states); + + for (auto &bench_ptr : benchmarks) + { + bench_ptr->set_printer(printer); + bench_ptr->run(); + bench_ptr->clear_printer(); + } +} + +inline void main_print_epilogue(option_parser &parser) +{ + auto &printer = parser.get_printer(); + printer.print_log_epilogue(); +} + +inline void main_print_results(option_parser &parser) +{ + auto &printer = parser.get_printer(); + auto &benchmarks = parser.get_benchmarks(); + printer.print_benchmark_results(benchmarks); +} + +inline void main_finalize() { NVBENCH_CUDA_CALL(cudaDeviceReset()); } + +} // namespace nvbench::detail diff --git a/nvbench/markdown_printer.cu b/nvbench/markdown_printer.cu index 276ca865..6b892a85 100644 --- a/nvbench/markdown_printer.cu +++ b/nvbench/markdown_printer.cu @@ -41,65 +41,54 @@ namespace nvbench void markdown_printer::do_print_device_info() { fmt::memory_buffer buffer; - fmt::format_to(buffer, "# Devices\n\n"); + fmt::format_to(std::back_inserter(buffer), "# Devices\n\n"); const auto &device_mgr = nvbench::device_manager::get(); - const auto &devices = device_mgr.get_number_of_used_devices() > 0 - ? device_mgr.get_used_devices() - : device_mgr.get_devices(); + const auto &devices = device_mgr.get_number_of_used_devices() > 0 ? device_mgr.get_used_devices() + : device_mgr.get_devices(); for (const auto &device : devices) { const auto [gmem_free, gmem_used] = device.get_global_memory_usage(); - fmt::format_to(buffer, "## [{}] `{}`\n", device.get_id(), device.get_name()); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "## [{}] `{}`\n", device.get_id(), device.get_name()); + fmt::format_to(std::back_inserter(buffer), "* SM Version: {} (PTX Version: {})\n", device.get_sm_version(), device.get_ptx_version()); - fmt::format_to(buffer, "* Number of SMs: {}\n", device.get_number_of_sms()); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "* Number of SMs: {}\n", device.get_number_of_sms()); + fmt::format_to(std::back_inserter(buffer), "* SM Default Clock Rate: {} MHz\n", device.get_sm_default_clock_rate() / 1000 / 1000); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "* Global Memory: {} MiB Free / {} MiB Total\n", gmem_free / 1024 / 1024, gmem_used / 1024 / 1024); - fmt::format_to( - buffer, - "* Global Memory Bus Peak: {} GB/sec ({}-bit DDR @{}MHz)\n", - device.get_global_memory_bus_bandwidth() / 1000 / 1000 / 1000, - device.get_global_memory_bus_width(), - device.get_global_memory_bus_peak_clock_rate() / 1000 / 1000); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), + "* Global Memory Bus Peak: {} GB/sec ({}-bit DDR @{}MHz)\n", + device.get_global_memory_bus_bandwidth() / 1000 / 1000 / 1000, + device.get_global_memory_bus_width(), + device.get_global_memory_bus_peak_clock_rate() / 1000 / 1000); + fmt::format_to(std::back_inserter(buffer), "* Max Shared Memory: {} KiB/SM, {} KiB/Block\n", device.get_shared_memory_per_sm() / 1024, device.get_shared_memory_per_block() / 1024); - fmt::format_to(buffer, - "* L2 Cache Size: {} KiB\n", - device.get_l2_cache_size() / 1024); - fmt::format_to(buffer, - "* Maximum Active Blocks: {}/SM\n", - device.get_max_blocks_per_sm()); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "* L2 Cache Size: {} KiB\n", device.get_l2_cache_size() / 1024); + fmt::format_to(std::back_inserter(buffer), "* Maximum Active Blocks: {}/SM\n", device.get_max_blocks_per_sm()); + fmt::format_to(std::back_inserter(buffer), "* Maximum Active Threads: {}/SM, {}/Block\n", device.get_max_threads_per_sm(), device.get_max_threads_per_block()); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "* Available Registers: {}/SM, {}/Block\n", device.get_registers_per_sm(), device.get_registers_per_block()); - fmt::format_to(buffer, - "* ECC Enabled: {}\n", - device.get_ecc_state() ? "Yes" : "No"); - fmt::format_to(buffer, "\n"); + fmt::format_to(std::back_inserter(buffer), "* ECC Enabled: {}\n", device.get_ecc_state() ? "Yes" : "No"); + fmt::format_to(std::back_inserter(buffer), "\n"); } m_ostream << fmt::to_string(buffer); } -void markdown_printer::do_print_log_preamble() -{ - m_ostream << "# Log\n\n```\n"; -} +void markdown_printer::do_print_log_preamble() { m_ostream << "# Log\n\n```\n"; } void markdown_printer::do_print_log_epilogue() { m_ostream << "```\n\n"; } @@ -147,8 +136,7 @@ void markdown_printer::do_log_run_state(const nvbench::state &exec_state) { if (m_total_state_count == 0) { // No progress info - this->log(nvbench::log_level::run, - exec_state.get_short_description(m_color)); + this->log(nvbench::log_level::run, exec_state.get_short_description(m_color)); } else { // Add progress @@ -160,8 +148,7 @@ void markdown_printer::do_log_run_state(const nvbench::state &exec_state) } } -void markdown_printer::do_print_benchmark_list( - const printer_base::benchmark_vector &benches) +void markdown_printer::do_print_benchmark_list(const printer_base::benchmark_vector &benches) { if (benches.empty()) { @@ -169,20 +156,20 @@ void markdown_printer::do_print_benchmark_list( } fmt::memory_buffer buffer; - fmt::format_to(buffer, "# Benchmarks\n\n"); + fmt::format_to(std::back_inserter(buffer), "# Benchmarks\n\n"); std::size_t benchmark_id{0}; for (const auto &bench_ptr : benches) { const auto &axes = bench_ptr->get_axes().get_axes(); const std::size_t num_configs = bench_ptr->get_config_count(); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "## [{}] `{}` ({} configurations)\n\n", benchmark_id++, bench_ptr->get_name(), num_configs); - fmt::format_to(buffer, "### Axes\n\n"); + fmt::format_to(std::back_inserter(buffer), "### Axes\n\n"); for (const auto &axis_ptr : axes) { std::string flags_str(axis_ptr->get_flags_as_string()); @@ -190,7 +177,7 @@ void markdown_printer::do_print_benchmark_list( { flags_str = fmt::format(" [{}]", flags_str); } - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "* `{}` : {}{}\n", axis_ptr->get_name(), axis_ptr->get_type_as_string(), @@ -204,20 +191,16 @@ void markdown_printer::do_print_benchmark_list( { desc = fmt::format(" ({})", desc); } - fmt::format_to(buffer, - " * `{}`{}\n", - axis_ptr->get_input_string(i), - desc); + fmt::format_to(std::back_inserter(buffer), " * `{}`{}\n", axis_ptr->get_input_string(i), desc); } // end foreach value } // end foreach axis - fmt::format_to(buffer, "\n"); + fmt::format_to(std::back_inserter(buffer), "\n"); } // end foreach bench m_ostream << fmt::to_string(buffer); } -void markdown_printer::do_print_benchmark_results( - const printer_base::benchmark_vector &benches) +void markdown_printer::do_print_benchmark_results(const printer_base::benchmark_vector &benches) { auto format_visitor = [](const auto &v) { using T = std::decay_t; @@ -239,7 +222,7 @@ void markdown_printer::do_print_benchmark_results( // Start printing benchmarks fmt::memory_buffer buffer; - fmt::format_to(buffer, "# Benchmark Results\n"); + fmt::format_to(std::back_inserter(buffer), "# Benchmark Results\n"); for (const auto &bench_ptr : benches) { @@ -247,24 +230,20 @@ void markdown_printer::do_print_benchmark_results( const auto &devices = bench.get_devices(); const auto &axes = bench.get_axes(); - fmt::format_to(buffer, "\n## {}\n", bench.get_name()); + fmt::format_to(std::back_inserter(buffer), "\n## {}\n", bench.get_name()); // Do a single pass when no devices are specified. This happens for // benchmarks with `cpu` exec_tags. const std::size_t num_device_passes = devices.empty() ? 1 : devices.size(); - for (std::size_t device_pass = 0; device_pass < num_device_passes; - ++device_pass) + for (std::size_t device_pass = 0; device_pass < num_device_passes; ++device_pass) { - std::optional device = - devices.empty() ? std::nullopt - : std::make_optional(devices[device_pass]); + std::optional device = devices.empty() + ? std::nullopt + : std::make_optional(devices[device_pass]); if (device) { - fmt::format_to(buffer, - "\n### [{}] {}\n\n", - device->get_id(), - device->get_name()); + fmt::format_to(std::back_inserter(buffer), "\n### [{}] {}\n\n", device->get_id(), device->get_name()); } std::size_t row = 0; @@ -288,15 +267,11 @@ void markdown_printer::do_print_benchmark_results( { const nvbench::int64_t value = axis_values.get_int64(name); const nvbench::int64_t exponent = int64_axis::compute_log2(value); - table.add_cell(row, - name, - name, - fmt::format("2^{} = {}", exponent, value)); + table.add_cell(row, name, name, fmt::format("2^{} = {}", exponent, value)); } else { - std::string value = std::visit(format_visitor, - axis_values.get_value(name)); + std::string value = std::visit(format_visitor, axis_values.get_value(name)); table.add_cell(row, name + "_axis", name, std::move(value)); } } @@ -308,12 +283,9 @@ void markdown_printer::do_print_benchmark_results( continue; } const std::string &tag = summ.get_tag(); - const std::string &header = summ.has_value("name") - ? summ.get_string("name") - : tag; + const std::string &header = summ.has_value("name") ? summ.get_string("name") : tag; - std::string hint = summ.has_value("hint") ? summ.get_string("hint") - : std::string{}; + std::string hint = summ.has_value("hint") ? summ.get_string("hint") : std::string{}; if (hint == "duration") { table.add_cell(row, tag, header, this->do_format_duration(summ)); @@ -332,10 +304,7 @@ void markdown_printer::do_print_benchmark_results( } else if (hint == "sample_size") { - table.add_cell(row, - tag, - header, - this->do_format_sample_size(summ)); + table.add_cell(row, tag, header, this->do_format_sample_size(summ)); } else if (hint == "percentage") { @@ -351,10 +320,9 @@ void markdown_printer::do_print_benchmark_results( } auto table_str = table.to_string(); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "{}", - table_str.empty() ? "No data -- check log.\n" - : std::move(table_str)); + table_str.empty() ? "No data -- check log.\n" : std::move(table_str)); } // end foreach device_pass } diff --git a/nvbench/named_values.cuh b/nvbench/named_values.cuh index c11dab4d..1ce51858 100644 --- a/nvbench/named_values.cuh +++ b/nvbench/named_values.cuh @@ -33,8 +33,7 @@ namespace nvbench */ struct named_values { - using value_type = - std::variant; + using value_type = std::variant; enum class type { @@ -43,7 +42,7 @@ struct named_values string }; - void append(const named_values& other); + void append(const named_values &other); [[nodiscard]] std::size_t get_size() const; [[nodiscard]] std::vector get_names() const; @@ -60,11 +59,11 @@ struct named_values [[nodiscard]] type get_type(const std::string &name) const; [[nodiscard]] bool has_value(const std::string &name) const; - [[nodiscard]] const value_type& get_value(const std::string &name) const; + [[nodiscard]] const value_type &get_value(const std::string &name) const; void clear(); - void remove_value(const std::string& name); + void remove_value(const std::string &name); private: struct named_value diff --git a/nvbench/named_values.cxx b/nvbench/named_values.cxx index 1aeb4dc4..605789ed 100644 --- a/nvbench/named_values.cxx +++ b/nvbench/named_values.cxx @@ -33,9 +33,7 @@ namespace nvbench void named_values::append(const named_values &other) { - m_storage.insert(m_storage.end(), - other.m_storage.cbegin(), - other.m_storage.cend()); + m_storage.insert(m_storage.end(), other.m_storage.cbegin(), other.m_storage.cend()); } void named_values::clear() { m_storage.clear(); } @@ -55,20 +53,17 @@ std::vector named_values::get_names() const bool named_values::has_value(const std::string &name) const { - auto iter = - std::find_if(m_storage.cbegin(), - m_storage.cend(), - [&name](const auto &val) { return val.name == name; }); + auto iter = std::find_if(m_storage.cbegin(), m_storage.cend(), [&name](const auto &val) { + return val.name == name; + }); return iter != m_storage.cend(); } -const named_values::value_type & -named_values::get_value(const std::string &name) const +const named_values::value_type &named_values::get_value(const std::string &name) const { - auto iter = - std::find_if(m_storage.cbegin(), - m_storage.cend(), - [&name](const auto &val) { return val.name == name; }); + auto iter = std::find_if(m_storage.cbegin(), m_storage.cend(), [&name](const auto &val) { + return val.name == name; + }); if (iter == m_storage.cend()) { NVBENCH_THROW(std::runtime_error, "No value with name '{}'.", name); @@ -96,9 +91,7 @@ named_values::type named_values::get_type(const std::string &name) const // warning C4702: unreachable code // This is a future-proofing check, it'll be reachable if something breaks NVBENCH_MSVC_PUSH_DISABLE_WARNING(4702) - NVBENCH_THROW(std::runtime_error, - "Unknown variant type for entry '{}'.", - name); + NVBENCH_THROW(std::runtime_error, "Unknown variant type for entry '{}'.", name); }, this->get_value(name)); NVBENCH_MSVC_POP_WARNING() @@ -111,10 +104,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up int64 value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up int64 value `{}`:\n{}", name, err.what()); } nvbench::float64_t named_values::get_float64(const std::string &name) const @@ -124,10 +114,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up float64 value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up float64 value `{}`:\n{}", name, err.what()); } const std::string &named_values::get_string(const std::string &name) const @@ -137,10 +124,7 @@ try } catch (std::exception &err) { - NVBENCH_THROW(std::runtime_error, - "Error looking up string value `{}`:\n{}", - name, - err.what()); + NVBENCH_THROW(std::runtime_error, "Error looking up string value `{}`:\n{}", name, err.what()); } void named_values::set_int64(std::string name, nvbench::int64_t value) @@ -165,10 +149,9 @@ void named_values::set_value(std::string name, named_values::value_type value) void named_values::remove_value(const std::string &name) { - auto iter = - std::find_if(m_storage.begin(), m_storage.end(), [&name](const auto &val) { - return val.name == name; - }); + auto iter = std::find_if(m_storage.begin(), m_storage.end(), [&name](const auto &val) { + return val.name == name; + }); if (iter != m_storage.end()) { m_storage.erase(iter); diff --git a/nvbench/nvbench.cuh b/nvbench/nvbench.cuh index 75bf1c1e..3fb933fb 100644 --- a/nvbench/nvbench.cuh +++ b/nvbench/nvbench.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu index 55f7f1c7..1edac87f 100644 --- a/nvbench/option_parser.cu +++ b/nvbench/option_parser.cu @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -82,20 +84,11 @@ std::string_view submatch_to_sv(const sv_submatch &in) // // So we're stuck with materializing a std::string and calling std::stoX(). Ah // well. At least it's not istream. -void parse(std::string_view input, nvbench::int32_t &val) -{ - val = std::stoi(std::string(input)); -} +void parse(std::string_view input, nvbench::int32_t &val) { val = std::stoi(std::string(input)); } -void parse(std::string_view input, nvbench::int64_t &val) -{ - val = std::stoll(std::string(input)); -} +void parse(std::string_view input, nvbench::int64_t &val) { val = std::stoll(std::string(input)); } -void parse(std::string_view input, nvbench::float64_t &val) -{ - val = std::stod(std::string(input)); -} +void parse(std::string_view input, nvbench::float64_t &val) { val = std::stod(std::string(input)); } void parse(std::string_view input, std::string &val) { val = input; } @@ -112,9 +105,8 @@ std::vector parse_list_values(std::string_view list_spec) "(?:,|$)" // Delimiters }; - auto values_begin = - sv_regex_iterator(list_spec.cbegin(), list_spec.cend(), value_regex); - auto values_end = sv_regex_iterator{}; + auto values_begin = sv_regex_iterator(list_spec.cbegin(), list_spec.cend(), value_regex); + auto values_end = sv_regex_iterator{}; while (values_begin != values_end) { auto match = *values_begin++; @@ -131,8 +123,7 @@ std::vector parse_list_values(std::string_view list_spec) // Parses a range specification " : [ : ]" and returns // a vector filled with the specified range. template -std::vector parse_range_values(std::string_view range_spec, - nvbench::wrapped_type) +std::vector parse_range_values(std::string_view range_spec, nvbench::wrapped_type) { std::vector range_params; @@ -143,9 +134,8 @@ std::vector parse_range_values(std::string_view range_spec, "(?:$|:)" // Delimiters }; - auto values_begin = - sv_regex_iterator(range_spec.cbegin(), range_spec.cend(), value_regex); - auto values_end = sv_regex_iterator{}; + auto values_begin = sv_regex_iterator(range_spec.cbegin(), range_spec.cend(), value_regex); + auto values_end = sv_regex_iterator{}; for (; values_begin != values_end; ++values_begin) { auto match = *values_begin; @@ -221,25 +211,15 @@ std::vector parse_values(std::string_view value_spec) "$"}; // EOS sv_match match; - if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - list_regex)) + if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, list_regex)) { return parse_list_values(submatch_to_sv(match[1])); } - else if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - range_regex)) + else if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, range_regex)) { - return parse_range_values(submatch_to_sv(match[1]), - nvbench::wrapped_type{}); + return parse_range_values(submatch_to_sv(match[1]), nvbench::wrapped_type{}); } - else if (std::regex_search(value_spec.cbegin(), - value_spec.cend(), - match, - single_regex)) + else if (std::regex_search(value_spec.cbegin(), value_spec.cend(), match, single_regex)) { T val; parse(submatch_to_sv(match[1]), val); @@ -247,9 +227,7 @@ std::vector parse_values(std::string_view value_spec) } else { - NVBENCH_THROW(std::runtime_error, - "Invalid axis value spec: {}", - value_spec); + NVBENCH_THROW(std::runtime_error, "Invalid axis value spec: {}", value_spec); } } @@ -389,7 +367,7 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, } auto check_params = [&first, &last](std::size_t num_params) { - const std::size_t rem_args = std::distance(first, last) - 1; + const std::size_t rem_args = static_cast(std::distance(first, last) - 1); if (rem_args < num_params) { NVBENCH_THROW(std::runtime_error, @@ -400,6 +378,9 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, } }; + const nvbench::criterion_manager::params_description criterion_params = + nvbench::criterion_manager::get().get_params_description(); + while (first < last) { const auto &arg = *first; @@ -423,7 +404,21 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, } else if (arg == "--list" || arg == "-l") { - this->print_list(); + nvbench::markdown_printer printer{std::cout}; + this->print_list(printer); + std::exit(0); + } + else if (arg == "--jsonlist-benches") + { + nvbench::json_printer printer{std::cout}; + const auto &bench_mgr = nvbench::benchmark_manager::get(); + printer.print_benchmark_list(bench_mgr.get_benchmarks()); + std::exit(0); + } + else if (arg == "--jsonlist-devices") + { + nvbench::json_printer printer{std::cout}; + printer.print_devices_json(); std::exit(0); } else if (arg == "--persistence-mode" || arg == "--pm") @@ -443,6 +438,12 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->enable_run_once(); first += 1; } + else if (arg == "--stopping-criterion") + { + check_params(1); + this->set_stopping_criterion(first[1]); + first += 2; + } else if (arg == "--disable-blocking-kernel") { this->disable_blocking_kernel(); @@ -454,7 +455,7 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->disable_blocking_kernel(); first += 1; } - else if (arg == "--quiet" | arg == "-q") + else if (arg == "--quiet" || arg == "-q") { // Setting this flag prevents the default stdout printer from being // added. @@ -514,18 +515,34 @@ void option_parser::parse_range(option_parser::arg_iterator_t first, this->update_int64_prop(first[0], first[1]); first += 2; } - else if (arg == "--min-time" || arg == "--max-noise" || - arg == "--skip-time" || arg == "--timeout") + else if (arg == "--skip-time" || arg == "--timeout") { check_params(1); this->update_float64_prop(first[0], first[1]); first += 2; } else - { - NVBENCH_THROW(std::runtime_error, - "Unrecognized command-line argument: `{}`.", - arg); + { // Try criterion params + if (arg.size() < 3 || arg[0] != '-' || arg[1] != '-') + { + NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); + } + + std::string_view name(arg.c_str() + 2, arg.size() - 2); + auto it = std::find_if(criterion_params.begin(), + criterion_params.end(), + [&name](const auto ¶m) { return param.first == name; }); + + if (it != criterion_params.end()) + { + check_params(1); + this->update_criterion_prop(first[0], first[1], it->second); + first += 2; + } + else + { + NVBENCH_THROW(std::runtime_error, "Unrecognized command-line argument: `{}`.", arg); + } } } } @@ -534,7 +551,7 @@ void option_parser::add_markdown_printer(const std::string &spec) try { std::ostream &stream = this->printer_spec_to_ostream(spec); - auto &printer = m_printer.emplace(stream, spec); + auto &printer = m_printer.emplace(stream, spec); if (spec == "stdout") { printer.set_color(m_color_md_stdout_printer); @@ -556,14 +573,10 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error while adding csv output for `{}`:\n{}", - spec, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error while adding csv output for `{}`:\n{}", spec, e.what()); } -void option_parser::add_json_printer(const std::string &spec, - bool enable_binary) +void option_parser::add_json_printer(const std::string &spec, bool enable_binary) try { std::ostream &stream = this->printer_spec_to_ostream(spec); @@ -610,11 +623,9 @@ void option_parser::print_version() const NVBENCH_GIT_VERSION); } -void option_parser::print_list() const +void option_parser::print_list(printer_base& printer) const { const auto &bench_mgr = nvbench::benchmark_manager::get(); - - nvbench::markdown_printer printer{std::cout}; printer.print_device_info(); printer.print_benchmark_list(bench_mgr.get_benchmarks()); } @@ -624,10 +635,7 @@ void option_parser::print_help() const fmt::print("{}\n{}\n", ::cli_help_text, ::cli_help_axis_text); } -void option_parser::print_help_axis() const -{ - fmt::print("{}\n", ::cli_help_axis_text); -} +void option_parser::print_help_axis() const { fmt::print("{}\n", ::cli_help_axis_text); } void option_parser::set_persistence_mode(const std::string &state) try @@ -685,9 +693,7 @@ try { if (rate_val == nvbench::device_info::clock_rate::none) { - fmt::print("Unlocking clocks for device '{}' ({}).\n", - device.get_name(), - device.get_id()); + fmt::print("Unlocking clocks for device '{}' ({}).\n", device.get_name(), device.get_id()); } else { @@ -721,6 +727,20 @@ void option_parser::enable_run_once() bench.set_run_once(true); } +void option_parser::set_stopping_criterion(const std::string &criterion) +{ + // If no active benchmark, save args as global. + if (m_benchmarks.empty()) + { + m_global_benchmark_args.push_back("--stopping-criterion"); + m_global_benchmark_args.push_back(criterion); + return; + } + + benchmark_base &bench = *m_benchmarks.back(); + bench.set_stopping_criterion(criterion); +} + void option_parser::disable_blocking_kernel() { // If no active benchmark, save args as global. @@ -749,7 +769,7 @@ try catch (std::invalid_argument &) {} - m_benchmarks.push_back(idx >= 0 ? mgr.get_benchmark(idx).clone() + m_benchmarks.push_back(idx >= 0 ? mgr.get_benchmark(static_cast(idx)).clone() : mgr.get_benchmark(name).clone()); // Initialize the new benchmark with any global arguments: @@ -757,16 +777,12 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --benchmark `{}`:\n{}", - name, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --benchmark `{}`:\n{}", name, e.what()); } void option_parser::replay_global_args() { - this->parse_range(m_global_benchmark_args.cbegin(), - m_global_benchmark_args.cend()); + this->parse_range(m_global_benchmark_args.cbegin(), m_global_benchmark_args.cend()); } void option_parser::update_devices(const std::string &devices) @@ -790,10 +806,7 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --devices `{}`:\n{}", - devices, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --devices `{}`:\n{}", devices, e.what()); } void option_parser::update_axis(const std::string &spec) @@ -832,28 +845,20 @@ try switch (axis.get_type()) { case axis_type::type: - this->update_type_axis(static_cast(axis), - values, - flags); + this->update_type_axis(static_cast(axis), values, flags); break; case axis_type::int64: - this->update_int64_axis(static_cast(axis), - values, - flags); + this->update_int64_axis(static_cast(axis), values, flags); break; case axis_type::float64: - this->update_float64_axis(static_cast(axis), - values, - flags); + this->update_float64_axis(static_cast(axis), values, flags); break; case axis_type::string: - this->update_string_axis(static_cast(axis), - values, - flags); + this->update_string_axis(static_cast(axis), values, flags); break; @@ -866,10 +871,7 @@ try } catch (std::exception &e) { - NVBENCH_THROW(std::runtime_error, - "Error handling option --axis `{}`:\n{}", - spec, - e.what()); + NVBENCH_THROW(std::runtime_error, "Error handling option --axis `{}`:\n{}", spec, e.what()); } void option_parser::update_int64_axis(int64_axis &axis, @@ -888,9 +890,7 @@ void option_parser::update_int64_axis(int64_axis &axis, } else { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for int64 axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for int64 axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -905,9 +905,7 @@ void option_parser::update_float64_axis(float64_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for float64 axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for float64 axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -922,9 +920,7 @@ void option_parser::update_string_axis(string_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for string axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for string axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -939,9 +935,7 @@ void option_parser::update_type_axis(type_axis &axis, // Validate flags: if (!flag_spec.empty()) { - NVBENCH_THROW(std::runtime_error, - "Invalid flag for type axis: `{}`", - flag_spec); + NVBENCH_THROW(std::runtime_error, "Invalid flag for type axis: `{}`", flag_spec); } auto input_values = parse_values(value_spec); @@ -949,8 +943,7 @@ void option_parser::update_type_axis(type_axis &axis, axis.set_active_inputs(input_values); } -void option_parser::update_int64_prop(const std::string &prop_arg, - const std::string &prop_val) +void option_parser::update_int64_prop(const std::string &prop_arg, const std::string &prop_val) try { // If no active benchmark, save args as global. @@ -983,9 +976,11 @@ catch (std::exception &e) e.what()); } -void option_parser::update_float64_prop(const std::string &prop_arg, - const std::string &prop_val) -try +void option_parser::update_criterion_prop( + const std::string &prop_arg, + const std::string &prop_val, + const nvbench::named_values::type type) +try { // If no active benchmark, save args as global. if (m_benchmarks.empty()) @@ -996,18 +991,59 @@ try } benchmark_base &bench = *m_benchmarks.back(); + nvbench::criterion_params& criterion_params = bench.get_criterion_params(); + std::string name(prop_arg.begin() + 2, prop_arg.end()); + if (type == nvbench::named_values::type::float64) + { + nvbench::float64_t value{}; + ::parse(prop_val, value); - nvbench::float64_t value{}; - ::parse(prop_val, value); - if (prop_arg == "--min-time") + if (prop_arg == "--max-noise") + { // Specified as percentage, stored as ratio: + value /= 100.0; + } + criterion_params.set_float64(name, value); + } + else if (type == nvbench::named_values::type::int64) + { + nvbench::int64_t value{}; + ::parse(prop_val, value); + criterion_params.set_int64(name, value); + } + else if (type == nvbench::named_values::type::string) { - bench.set_min_time(value); + criterion_params.set_string(name, prop_val); } - else if (prop_arg == "--max-noise") - { // Specified as percentage, stored as ratio: - bench.set_max_noise(value / 100.); + else + { + NVBENCH_THROW(std::runtime_error, "Unrecognized property: `{}`", prop_arg); } - else if (prop_arg == "--skip-time") +} +catch (std::exception& e) +{ + NVBENCH_THROW(std::runtime_error, + "Error handling option `{} {}`:\n{}", + prop_arg, + prop_val, + e.what()); +} + +void option_parser::update_float64_prop(const std::string &prop_arg, const std::string &prop_val) +try +{ + // If no active benchmark, save args as global. + if (m_benchmarks.empty()) + { + m_global_benchmark_args.push_back(prop_arg); + m_global_benchmark_args.push_back(prop_val); + return; + } + + benchmark_base &bench = *m_benchmarks.back(); + + nvbench::float64_t value{}; + ::parse(prop_val, value); + if (prop_arg == "--skip-time") { bench.set_skip_time(value); } diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh index e35d7241..5bd834c3 100644 --- a/nvbench/option_parser.cuh +++ b/nvbench/option_parser.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -41,8 +42,7 @@ struct type_axis; */ struct option_parser { - using benchmark_vector = - std::vector>; + using benchmark_vector = std::vector>; option_parser(); ~option_parser(); @@ -51,15 +51,9 @@ struct option_parser void parse(std::vector args); [[nodiscard]] benchmark_vector &get_benchmarks() { return m_benchmarks; }; - [[nodiscard]] const benchmark_vector &get_benchmarks() const - { - return m_benchmarks; - }; + [[nodiscard]] const benchmark_vector &get_benchmarks() const { return m_benchmarks; }; - [[nodiscard]] const std::vector &get_args() const - { - return m_args; - } + [[nodiscard]] const std::vector &get_args() const { return m_args; } /*! * Returns the output format requested by the parse options. @@ -86,13 +80,14 @@ private: std::ostream &printer_spec_to_ostream(const std::string &spec); void print_version() const; - void print_list() const; + void print_list(printer_base& printer) const; void print_help() const; void print_help_axis() const; void set_persistence_mode(const std::string &state); void lock_gpu_clocks(const std::string &rate); + void set_stopping_criterion(const std::string &criterion); void enable_run_once(); void disable_blocking_kernel(); @@ -115,10 +110,12 @@ private: std::string_view value_spec, std::string_view flag_spec); - void update_int64_prop(const std::string &prop_arg, - const std::string &prop_val); - void update_float64_prop(const std::string &prop_arg, - const std::string &prop_val); + void update_int64_prop(const std::string &prop_arg, const std::string &prop_val); + void update_float64_prop(const std::string &prop_arg, const std::string &prop_val); + + void update_criterion_prop(const std::string &prop_arg, + const std::string &prop_val, + const nvbench::named_values::type type); void update_used_device_state() const; diff --git a/nvbench/printer_base.cuh b/nvbench/printer_base.cuh index 0e28a352..13cf803b 100644 --- a/nvbench/printer_base.cuh +++ b/nvbench/printer_base.cuh @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -76,19 +77,16 @@ struct printer_base virtual ~printer_base(); // move-only - printer_base(const printer_base &) = delete; - printer_base(printer_base &&) = default; + printer_base(const printer_base &) = delete; + printer_base(printer_base &&) = default; printer_base &operator=(const printer_base &) = delete; - printer_base &operator=(printer_base &&) = default; + printer_base &operator=(printer_base &&) = delete; /*! * Called once with the command line arguments used to invoke the current * executable. */ - void log_argv(const std::vector &argv) - { - this->do_log_argv(argv); - } + void log_argv(const std::vector &argv) { this->do_log_argv(argv); } /*! * Print a summary of all detected devices, if supported. @@ -108,19 +106,13 @@ struct printer_base /*! * Print a log message at the specified log level. */ - void log(nvbench::log_level level, const std::string &msg) - { - this->do_log(level, msg); - } + void log(nvbench::log_level level, const std::string &msg) { this->do_log(level, msg); } /*! * Called before running the measurements associated with state. * Implementations are expected to call `log(log_level::run, ...)`. */ - void log_run_state(const nvbench::state &exec_state) - { - this->do_log_run_state(exec_state); - } + void log_run_state(const nvbench::state &exec_state) { this->do_log_run_state(exec_state); } /*! * Measurements may call this to allow a printer to perform extra processing @@ -181,10 +173,7 @@ struct printer_base return this->do_get_completed_state_count(); } - virtual void set_total_state_count(std::size_t states) - { - this->do_set_total_state_count(states); - } + virtual void set_total_state_count(std::size_t states) { this->do_set_total_state_count(states); } [[nodiscard]] virtual std::size_t get_total_state_count() const { return this->do_get_total_state_count(); @@ -193,18 +182,22 @@ struct printer_base protected: // Implementation hooks for subclasses: - virtual void do_log_argv(const std::vector&) {} + virtual void do_log_argv(const std::vector &) {} virtual void do_print_device_info() {} virtual void do_print_log_preamble() {} virtual void do_print_log_epilogue() {} virtual void do_log(nvbench::log_level, const std::string &) {} virtual void do_log_run_state(const nvbench::state &) {} - virtual void - do_process_bulk_data_float64(nvbench::state &, - const std::string &, - const std::string &, - const std::vector &){}; - virtual void do_print_benchmark_list(const benchmark_vector &) {} + virtual void do_process_bulk_data_float64(nvbench::state &, + const std::string &, + const std::string &, + const std::vector &){}; + + virtual void do_print_benchmark_list(const benchmark_vector &) + { + throw std::runtime_error{"nvbench::do_print_benchmark_list is not supported by this printer."}; + } + virtual void do_print_benchmark_results(const benchmark_vector &) {} virtual void do_set_completed_state_count(std::size_t states); diff --git a/nvbench/printer_base.cxx b/nvbench/printer_base.cxx index 66de7959..639edc20 100644 --- a/nvbench/printer_base.cxx +++ b/nvbench/printer_base.cxx @@ -38,19 +38,10 @@ void printer_base::do_set_completed_state_count(std::size_t states) void printer_base::do_add_completed_state() { ++m_completed_state_count; } -std::size_t printer_base::do_get_completed_state_count() const -{ - return m_completed_state_count; -} +std::size_t printer_base::do_get_completed_state_count() const { return m_completed_state_count; } -void printer_base::do_set_total_state_count(std::size_t states) -{ - m_total_state_count = states; -} +void printer_base::do_set_total_state_count(std::size_t states) { m_total_state_count = states; } -std::size_t printer_base::do_get_total_state_count() const -{ - return m_total_state_count; -} +std::size_t printer_base::do_get_total_state_count() const { return m_total_state_count; } } // namespace nvbench diff --git a/nvbench/printer_multiplex.cuh b/nvbench/printer_multiplex.cuh index f32a0e9b..797b480c 100644 --- a/nvbench/printer_multiplex.cuh +++ b/nvbench/printer_multiplex.cuh @@ -40,10 +40,7 @@ struct printer_multiplex : nvbench::printer_base return static_cast(*m_printers.back()); } - [[nodiscard]] std::size_t get_printer_count() const - { - return m_printers.size(); - } + [[nodiscard]] std::size_t get_printer_count() const { return m_printers.size(); } protected: void do_log_argv(const std::vector &argv) override; @@ -52,11 +49,10 @@ protected: void do_print_log_epilogue() override; void do_log(nvbench::log_level, const std::string &) override; void do_log_run_state(const nvbench::state &) override; - void do_process_bulk_data_float64( - nvbench::state &, - const std::string &, - const std::string &, - const std::vector &) override; + void do_process_bulk_data_float64(nvbench::state &, + const std::string &, + const std::string &, + const std::vector &) override; void do_print_benchmark_list(const benchmark_vector &benches) override; void do_print_benchmark_results(const benchmark_vector &benches) override; void do_set_completed_state_count(std::size_t states) override; diff --git a/nvbench/printer_multiplex.cxx b/nvbench/printer_multiplex.cxx index 86d99544..89867c12 100644 --- a/nvbench/printer_multiplex.cxx +++ b/nvbench/printer_multiplex.cxx @@ -67,11 +67,10 @@ void printer_multiplex::do_log_run_state(const nvbench::state &exec_state) } } -void printer_multiplex::do_process_bulk_data_float64( - state &state, - const std::string &tag, - const std::string &hint, - const std::vector &data) +void printer_multiplex::do_process_bulk_data_float64(state &state, + const std::string &tag, + const std::string &hint, + const std::vector &data) { for (auto &format_ptr : m_printers) { @@ -87,8 +86,7 @@ void printer_multiplex::do_print_benchmark_list(const benchmark_vector &benches) } } -void printer_multiplex::do_print_benchmark_results( - const benchmark_vector &benches) +void printer_multiplex::do_print_benchmark_results(const benchmark_vector &benches) { for (auto &format_ptr : m_printers) { diff --git a/nvbench/range.cuh b/nvbench/range.cuh index f0e82550..7000f872 100644 --- a/nvbench/range.cuh +++ b/nvbench/range.cuh @@ -29,13 +29,11 @@ namespace nvbench namespace detail { template -using range_output_t = std::conditional_t, - nvbench::float64_t, - nvbench::int64_t>; +using range_output_t = + std::conditional_t, nvbench::float64_t, nvbench::int64_t>; } -template > +template > auto range(InT start, InT end, InT stride = InT{1}) { if constexpr (std::is_floating_point_v) diff --git a/nvbench/runner.cuh b/nvbench/runner.cuh index 9435906d..f32b2223 100644 --- a/nvbench/runner.cuh +++ b/nvbench/runner.cuh @@ -37,8 +37,7 @@ struct runner_base void generate_states(); - void handle_sampling_exception(const std::exception &e, - nvbench::state &exec_state) const; + void handle_sampling_exception(const std::exception &e, nvbench::state &exec_state) const; void run_state_prologue(state &exec_state) const; void run_state_epilogue(state &exec_state) const; @@ -51,11 +50,10 @@ struct runner_base template struct runner : public runner_base { - using benchmark_type = BenchmarkType; - using kernel_generator = typename benchmark_type::kernel_generator; - using type_configs = typename benchmark_type::type_configs; - static constexpr std::size_t num_type_configs = - benchmark_type::num_type_configs; + using benchmark_type = BenchmarkType; + using kernel_generator = typename benchmark_type::kernel_generator; + using type_configs = typename benchmark_type::type_configs; + static constexpr std::size_t num_type_configs = benchmark_type::num_type_configs; explicit runner(benchmark_type &bench) : runner_base{bench} @@ -86,38 +84,37 @@ private: // Iterate through type_configs: std::size_t type_config_index = 0; - nvbench::tl::foreach([&self = *this, - &states = m_benchmark.m_states, - &type_config_index, - &device](auto type_config_wrapper) { - // Get current type_config: - using type_config = typename decltype(type_config_wrapper)::type; - - // Find states with the current device / type_config - for (nvbench::state &cur_state : states) - { - if (cur_state.get_device() == device && - cur_state.get_type_config_index() == type_config_index) + nvbench::tl::foreach( + [&self = *this, &states = m_benchmark.m_states, &type_config_index, &device]( + auto type_config_wrapper) { + // Get current type_config: + using type_config = typename decltype(type_config_wrapper)::type; + + // Find states with the current device / type_config + for (nvbench::state &cur_state : states) { - self.run_state_prologue(cur_state); - try + if (cur_state.get_device() == device && + cur_state.get_type_config_index() == type_config_index) { - kernel_generator{}(cur_state, type_config{}); - if (cur_state.is_skipped()) + self.run_state_prologue(cur_state); + try { - self.print_skip_notification(cur_state); + kernel_generator{}(cur_state, type_config{}); + if (cur_state.is_skipped()) + { + self.print_skip_notification(cur_state); + } } + catch (std::exception &e) + { + self.handle_sampling_exception(e, cur_state); + } + self.run_state_epilogue(cur_state); } - catch (std::exception &e) - { - self.handle_sampling_exception(e, cur_state); - } - self.run_state_epilogue(cur_state); } - } - ++type_config_index; - }); + ++type_config_index; + }); } }; diff --git a/nvbench/runner.cxx b/nvbench/runner.cxx index 3aba964d..93cedf57 100644 --- a/nvbench/runner.cxx +++ b/nvbench/runner.cxx @@ -35,8 +35,7 @@ void runner_base::generate_states() m_benchmark.m_states = nvbench::detail::state_generator::create(m_benchmark); } -void runner_base::handle_sampling_exception(const std::exception &e, - state &exec_state) const +void runner_base::handle_sampling_exception(const std::exception &e, state &exec_state) const { // If the state is skipped, that means the execution framework class handled // the error already. @@ -62,8 +61,7 @@ void runner_base::handle_sampling_exception(const std::exception &e, void runner_base::run_state_prologue(nvbench::state &exec_state) const { // Log if a printer exists: - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log_run_state(exec_state); @@ -73,19 +71,16 @@ void runner_base::run_state_prologue(nvbench::state &exec_state) const void runner_base::run_state_epilogue(state &exec_state) const { // Notify the printer that the state has completed:: - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.add_completed_state(); } } - void runner_base::print_skip_notification(state &exec_state) const { - if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); - printer_opt_ref.has_value()) + if (auto printer_opt_ref = exec_state.get_benchmark().get_printer(); printer_opt_ref.has_value()) { auto &printer = printer_opt_ref.value().get(); printer.log(nvbench::log_level::skip, exec_state.get_skip_reason()); diff --git a/nvbench/state.cuh b/nvbench/state.cuh index 336ba2ba..09795de3 100644 --- a/nvbench/state.cuh +++ b/nvbench/state.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -58,106 +59,79 @@ struct state_tester; struct state { // move-only - state(const state &) = delete; - state(state &&) = default; + state(const state &) = delete; + state(state &&) = default; state &operator=(const state &) = delete; - state &operator=(state &&) = default; + state &operator=(state &&) = default; - [[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const - { - return m_cuda_stream; - } - void set_cuda_stream(nvbench::cuda_stream &&stream) - { - m_cuda_stream = std::move(stream); - } + [[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const { return m_cuda_stream; } + void set_cuda_stream(nvbench::cuda_stream &&stream) { m_cuda_stream = std::move(stream); } /// The CUDA device associated with with this benchmark state. May be /// nullopt for CPU-only benchmarks. - [[nodiscard]] const std::optional &get_device() const - { - return m_device; - } + [[nodiscard]] const std::optional &get_device() const { return m_device; } /// An index into a benchmark::type_configs type_list. Returns 0 if no type /// axes in the associated benchmark. - [[nodiscard]] std::size_t get_type_config_index() const - { - return m_type_config_index; - } + [[nodiscard]] std::size_t get_type_config_index() const { return m_type_config_index; } [[nodiscard]] nvbench::int64_t get_int64(const std::string &axis_name) const; - [[nodiscard]] nvbench::int64_t - get_int64_or_default(const std::string &axis_name, - nvbench::int64_t default_value) const; - - [[nodiscard]] nvbench::float64_t - get_float64(const std::string &axis_name) const; - [[nodiscard]] nvbench::float64_t - get_float64_or_default(const std::string &axis_name, - nvbench::float64_t default_value) const; - - [[nodiscard]] const std::string & - get_string(const std::string &axis_name) const; - [[nodiscard]] const std::string & - get_string_or_default(const std::string &axis_name, - const std::string &default_value) const; + [[nodiscard]] nvbench::int64_t get_int64_or_default(const std::string &axis_name, + nvbench::int64_t default_value) const; + + [[nodiscard]] nvbench::float64_t get_float64(const std::string &axis_name) const; + [[nodiscard]] nvbench::float64_t get_float64_or_default(const std::string &axis_name, + nvbench::float64_t default_value) const; + + [[nodiscard]] const std::string &get_string(const std::string &axis_name) const; + [[nodiscard]] const std::string &get_string_or_default(const std::string &axis_name, + const std::string &default_value) const; void add_element_count(std::size_t elements, std::string column_name = {}); void set_element_count(std::size_t elements) { m_element_count = elements; } - [[nodiscard]] std::size_t get_element_count() const - { - return m_element_count; - } + [[nodiscard]] std::size_t get_element_count() const { return m_element_count; } template void add_global_memory_reads(std::size_t count, std::string column_name = {}) { - this->add_global_memory_reads(count * sizeof(ElementType), - std::move(column_name)); + this->add_global_memory_reads(count * sizeof(ElementType), std::move(column_name)); } void add_global_memory_reads(std::size_t bytes, std::string column_name = {}); template void add_global_memory_writes(std::size_t count, std::string column_name = {}) { - this->add_global_memory_writes(count * sizeof(ElementType), - std::move(column_name)); + this->add_global_memory_writes(count * sizeof(ElementType), std::move(column_name)); } - void add_global_memory_writes(std::size_t bytes, - std::string column_name = {}); + void add_global_memory_writes(std::size_t bytes, std::string column_name = {}); void add_buffer_size(std::size_t num_bytes, std::string summary_tag, std::string column_name = {}, std::string description = {}); - void set_global_memory_rw_bytes(std::size_t bytes) - { - m_global_memory_rw_bytes = bytes; - } - [[nodiscard]] std::size_t get_global_memory_rw_bytes() const - { - return m_global_memory_rw_bytes; - } + void set_global_memory_rw_bytes(std::size_t bytes) { m_global_memory_rw_bytes = bytes; } + [[nodiscard]] std::size_t get_global_memory_rw_bytes() const { return m_global_memory_rw_bytes; } void skip(std::string reason) { m_skip_reason = std::move(reason); } [[nodiscard]] bool is_skipped() const { return !m_skip_reason.empty(); } - [[nodiscard]] const std::string &get_skip_reason() const - { - return m_skip_reason; - } + [[nodiscard]] const std::string &get_skip_reason() const { return m_skip_reason; } /// Execute at least this many trials per measurement. @{ - [[nodiscard]] nvbench::int64_t get_min_samples() const - { - return m_min_samples; - } - void set_min_samples(nvbench::int64_t min_samples) + [[nodiscard]] nvbench::int64_t get_min_samples() const { return m_min_samples; } + void set_min_samples(nvbench::int64_t min_samples) { m_min_samples = min_samples; } + /// @} + + [[nodiscard]] const nvbench::criterion_params &get_criterion_params() const { - m_min_samples = min_samples; + return m_criterion_params; } + + /// Control the stopping criterion for the measurement loop. + /// @{ + [[nodiscard]] const std::string& get_stopping_criterion() const { return m_stopping_criterion; } + void set_stopping_criterion(std::string criterion) { m_stopping_criterion = std::move(criterion); } /// @} /// If true, the benchmark is only run once, skipping all warmup runs and only @@ -173,16 +147,30 @@ struct state void set_disable_blocking_kernel(bool v) { m_disable_blocking_kernel = v; } /// @} - /// Accumulate at least this many seconds of timing data per measurement. @{ - [[nodiscard]] nvbench::float64_t get_min_time() const { return m_min_time; } - void set_min_time(nvbench::float64_t min_time) { m_min_time = min_time; } + /// Accumulate at least this many seconds of timing data per measurement. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_min_time() const + { + return m_criterion_params.get_float64("min-time"); + } + void set_min_time(nvbench::float64_t min_time) + { + m_criterion_params.set_float64("min-time", min_time); + } /// @} /// Specify the maximum amount of noise if a measurement supports noise. /// Noise is the relative standard deviation: - /// `noise = stdev / mean_time`. @{ - [[nodiscard]] nvbench::float64_t get_max_noise() const { return m_max_noise; } - void set_max_noise(nvbench::float64_t max_noise) { m_max_noise = max_noise; } + /// `noise = stdev / mean_time`. + /// Only applies to `stdrel` stopping criterion. @{ + [[nodiscard]] nvbench::float64_t get_max_noise() const + { + return m_criterion_params.get_float64("max-noise"); + } + void set_max_noise(nvbench::float64_t max_noise) + { + m_criterion_params.set_float64("max-noise", max_noise); + } /// @} /// If a warmup run finishes in less than `skip_time`, the measurement will @@ -222,20 +210,14 @@ struct state } ///@} - [[nodiscard]] const named_values &get_axis_values() const - { - return m_axis_values; - } + [[nodiscard]] const named_values &get_axis_values() const { return m_axis_values; } /*! * Return a string of "axis_name1=input_string1 axis_name2=input_string2 ..." */ [[nodiscard]] std::string get_axis_values_as_string(bool color = false) const; - [[nodiscard]] const benchmark_base &get_benchmark() const - { - return m_benchmark; - } + [[nodiscard]] const benchmark_base &get_benchmark() const { return m_benchmark; } void collect_l1_hit_rates() { m_collect_l1_hit_rates = true; } void collect_l2_hit_rates() { m_collect_l2_hit_rates = true; } @@ -252,26 +234,11 @@ struct state collect_dram_throughput(); } - [[nodiscard]] bool is_l1_hit_rate_collected() const - { - return m_collect_l1_hit_rates; - } - [[nodiscard]] bool is_l2_hit_rate_collected() const - { - return m_collect_l2_hit_rates; - } - [[nodiscard]] bool is_stores_efficiency_collected() const - { - return m_collect_stores_efficiency; - } - [[nodiscard]] bool is_loads_efficiency_collected() const - { - return m_collect_loads_efficiency; - } - [[nodiscard]] bool is_dram_throughput_collected() const - { - return m_collect_dram_throughput; - } + [[nodiscard]] bool is_l1_hit_rate_collected() const { return m_collect_l1_hit_rates; } + [[nodiscard]] bool is_l2_hit_rate_collected() const { return m_collect_l2_hit_rates; } + [[nodiscard]] bool is_stores_efficiency_collected() const { return m_collect_stores_efficiency; } + [[nodiscard]] bool is_loads_efficiency_collected() const { return m_collect_loads_efficiency; } + [[nodiscard]] bool is_dram_throughput_collected() const { return m_collect_dram_throughput; } [[nodiscard]] bool is_cupti_required() const { @@ -306,8 +273,7 @@ struct state template void exec(KernelLauncher &&kernel_launcher) { - this->exec(nvbench::exec_tag::none, - std::forward(kernel_launcher)); + this->exec(nvbench::exec_tag::none, std::forward(kernel_launcher)); } private: @@ -330,9 +296,11 @@ private: bool m_run_once{false}; bool m_disable_blocking_kernel{false}; + + nvbench::criterion_params m_criterion_params; + std::string m_stopping_criterion; + nvbench::int64_t m_min_samples; - nvbench::float64_t m_min_time; - nvbench::float64_t m_max_noise; nvbench::float64_t m_skip_time; nvbench::float64_t m_timeout; diff --git a/nvbench/state.cxx b/nvbench/state.cxx index 0774faa7..1be48c58 100644 --- a/nvbench/state.cxx +++ b/nvbench/state.cxx @@ -36,9 +36,9 @@ state::state(const benchmark_base &bench) : m_benchmark{bench} , m_run_once{bench.get_run_once()} , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()} + , m_criterion_params{bench.get_criterion_params()} + , m_stopping_criterion(bench.get_stopping_criterion()) , m_min_samples{bench.get_min_samples()} - , m_min_time{bench.get_min_time()} - , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} {} @@ -53,9 +53,9 @@ state::state(const benchmark_base &bench, , m_type_config_index{type_config_index} , m_run_once{bench.get_run_once()} , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()} + , m_criterion_params{bench.get_criterion_params()} + , m_stopping_criterion(bench.get_stopping_criterion()) , m_min_samples{bench.get_min_samples()} - , m_min_time{bench.get_min_time()} - , m_max_noise{bench.get_max_noise()} , m_skip_time{bench.get_skip_time()} , m_timeout{bench.get_timeout()} {} @@ -65,9 +65,8 @@ nvbench::int64_t state::get_int64(const std::string &axis_name) const return m_axis_values.get_int64(axis_name); } -nvbench::int64_t -state::get_int64_or_default(const std::string &axis_name, - nvbench::int64_t default_value) const +nvbench::int64_t state::get_int64_or_default(const std::string &axis_name, + nvbench::int64_t default_value) const try { return this->get_int64(axis_name); @@ -82,9 +81,8 @@ nvbench::float64_t state::get_float64(const std::string &axis_name) const return m_axis_values.get_float64(axis_name); } -nvbench::float64_t -state::get_float64_or_default(const std::string &axis_name, - nvbench::float64_t default_value) const +nvbench::float64_t state::get_float64_or_default(const std::string &axis_name, + nvbench::float64_t default_value) const try { return this->get_float64(axis_name); @@ -99,9 +97,8 @@ const std::string &state::get_string(const std::string &axis_name) const return m_axis_values.get_string(axis_name); } -const std::string & -state::get_string_or_default(const std::string &axis_name, - const std::string &default_value) const +const std::string &state::get_string_or_default(const std::string &axis_name, + const std::string &default_value) const try { return this->get_string(axis_name); @@ -125,20 +122,18 @@ summary &state::add_summary(summary s) const summary &state::get_summary(std::string_view tag) const { // Check tags first - auto iter = - std::find_if(m_summaries.cbegin(), - m_summaries.cend(), - [&tag](const auto &s) { return s.get_tag() == tag; }); + auto iter = std::find_if(m_summaries.cbegin(), m_summaries.cend(), [&tag](const auto &s) { + return s.get_tag() == tag; + }); if (iter != m_summaries.cend()) { return *iter; } // Then names: - iter = - std::find_if(m_summaries.cbegin(), - m_summaries.cend(), - [&tag](const auto &s) { return s.get_string("name") == tag; }); + iter = std::find_if(m_summaries.cbegin(), m_summaries.cend(), [&tag](const auto &s) { + return s.get_string("name") == tag; + }); if (iter != m_summaries.cend()) { return *iter; @@ -150,20 +145,18 @@ const summary &state::get_summary(std::string_view tag) const summary &state::get_summary(std::string_view tag) { // Check tags first - auto iter = - std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { - return s.get_tag() == tag; - }); + auto iter = std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { + return s.get_tag() == tag; + }); if (iter != m_summaries.end()) { return *iter; } // Then names: - iter = - std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { - return s.get_string("name") == tag; - }); + iter = std::find_if(m_summaries.begin(), m_summaries.end(), [&tag](const auto &s) { + return s.get_string("name") == tag; + }); if (iter != m_summaries.end()) { return *iter; @@ -187,18 +180,17 @@ std::string state::get_axis_values_as_string(bool color) const // Create a Key=Value list of all parameters: fmt::memory_buffer buffer; - auto append_key_value = [&buffer, &style](const std::string &key, - const auto &value, - std::string value_fmtstr = "{}") { - constexpr auto key_format = fmt::emphasis::italic; - constexpr auto value_format = fmt::emphasis::bold; - - fmt::format_to(buffer, - "{}{}={}", - buffer.size() == 0 ? "" : " ", - fmt::format(style(key_format), "{}", key), - fmt::format(style(value_format), value_fmtstr, value)); - }; + auto append_key_value = + [&buffer, &style](const std::string &key, const auto &value, std::string value_fmtstr = "{}") { + constexpr auto key_format = fmt::emphasis::italic; + constexpr auto value_format = fmt::emphasis::bold; + + fmt::format_to(std::back_inserter(buffer), + "{}{}={}", + buffer.size() == 0 ? "" : " ", + fmt::format(style(key_format), "{}", key), + fmt::format(style(value_format), value_fmtstr, value)); + }; if (m_device) { @@ -211,8 +203,7 @@ std::string state::get_axis_values_as_string(bool color) const const auto axis_type = m_axis_values.get_type(name); // Handle power-of-two int64 axes differently: - if (axis_type == named_values::type::int64 && - axes.get_int64_axis(name).is_power_of_two()) + if (axis_type == named_values::type::int64 && axes.get_int64_axis(name).is_power_of_two()) { const nvbench::int64_t value = m_axis_values.get_int64(name); const nvbench::int64_t exponent = int64_axis::compute_log2(value); @@ -242,10 +233,9 @@ std::string state::get_short_description(bool color) const return color ? fmt_style : no_style; }; - return fmt::format( - "{} [{}]", - fmt::format(style(fmt::emphasis::bold), "{}", m_benchmark.get().get_name()), - this->get_axis_values_as_string(color)); + return fmt::format("{} [{}]", + fmt::format(style(fmt::emphasis::bold), "{}", m_benchmark.get().get_name()), + this->get_axis_values_as_string(color)); } void state::add_element_count(std::size_t elements, std::string column_name) diff --git a/nvbench/stopping_criterion.cuh b/nvbench/stopping_criterion.cuh new file mode 100644 index 00000000..006a6994 --- /dev/null +++ b/nvbench/stopping_criterion.cuh @@ -0,0 +1,138 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +#include +#include + +namespace nvbench +{ + +namespace detail +{ + +constexpr nvbench::float64_t compat_min_time() { return 0.5; } // 0.5 seconds +constexpr nvbench::float64_t compat_max_noise() { return 0.005; } // 0.5% relative standard deviation + +} // namespace detail + +/** + * Stores all the parameters for stopping criterion in use + */ +class criterion_params +{ + nvbench::named_values m_named_values; +public: + criterion_params(); + criterion_params(std::initializer_list>); + + /** + * Set parameter values from another criterion_params object if they exist + * + * Parameters in `other` that do not correspond to parameters in `this` are ignored. + */ + void set_from(const criterion_params &other); + + void set_int64(std::string name, nvbench::int64_t value); + void set_float64(std::string name, nvbench::float64_t value); + void set_string(std::string name, std::string value); + + [[nodiscard]] std::vector get_names() const; + [[nodiscard]] nvbench::named_values::type get_type(const std::string &name) const; + + [[nodiscard]] bool has_value(const std::string &name) const; + [[nodiscard]] nvbench::int64_t get_int64(const std::string &name) const; + [[nodiscard]] nvbench::float64_t get_float64(const std::string &name) const; + [[nodiscard]] std::string get_string(const std::string &name) const; +}; + +/** + * Stopping criterion interface + */ +class stopping_criterion_base +{ +protected: + std::string m_name; + criterion_params m_params; + +public: + /** + * @param name Unique name of the criterion + * @param params Default values for all parameters of the criterion + */ + explicit stopping_criterion_base(std::string name, criterion_params params) + : m_name{std::move(name)} + , m_params{std::move(params)} + {} + + virtual ~stopping_criterion_base() = default; + + [[nodiscard]] const std::string &get_name() const { return m_name; } + [[nodiscard]] const criterion_params &get_params() const { return m_params; } + + /** + * Initialize the criterion with the given parameters + * + * This method is called once per benchmark run, before any measurements are provided. + */ + void initialize(const criterion_params ¶ms) + { + m_params.set_from(params); + this->do_initialize(); + } + + /** + * Add the latest measurement to the criterion + */ + void add_measurement(nvbench::float64_t measurement) + { + this->do_add_measurement(measurement); + } + + /** + * Check if the criterion has been met for all measurements processed by `add_measurement` + */ + bool is_finished() + { + return this->do_is_finished(); + } + +protected: + /** + * Initialize the criterion after updaring the parameters + */ + virtual void do_initialize() = 0; + + /** + * Add the latest measurement to the criterion + */ + virtual void do_add_measurement(nvbench::float64_t measurement) = 0; + + /** + * Check if the criterion has been met for all measurements processed by `add_measurement` + */ + virtual bool do_is_finished() = 0; +}; + +} // namespace nvbench diff --git a/nvbench/stopping_criterion.cxx b/nvbench/stopping_criterion.cxx new file mode 100644 index 00000000..976a1a71 --- /dev/null +++ b/nvbench/stopping_criterion.cxx @@ -0,0 +1,124 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + + +namespace nvbench +{ + +// Default constructor for compatibility with old code +criterion_params::criterion_params() + : criterion_params{{"max-noise", nvbench::detail::compat_max_noise()}, + {"min-time", nvbench::detail::compat_min_time()}} +{} + +criterion_params::criterion_params( + std::initializer_list> list) +{ + for (const auto &[name, value] : list) + { + m_named_values.set_value(name, value); + } +} + +void criterion_params::set_from(const criterion_params &other) +{ + for (const std::string &name : this->get_names()) + { + if (other.has_value(name)) + { + if (this->get_type(name) != other.get_type(name)) + { + NVBENCH_THROW(std::runtime_error, + "Mismatched types for named value \"{}\". " + "Expected {}, got {}.", + name, + static_cast(this->get_type(name)), + static_cast(other.get_type(name))); + } + m_named_values.remove_value(name); + m_named_values.set_value(name, other.m_named_values.get_value(name)); + } + } +} + +void criterion_params::set_int64(std::string name, nvbench::int64_t value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_int64(name, value); +} + +void criterion_params::set_float64(std::string name, nvbench::float64_t value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_float64(name, value); +} + +void criterion_params::set_string(std::string name, std::string value) +{ + if (m_named_values.has_value(name)) + { + m_named_values.remove_value(name); + } + + m_named_values.set_string(name, std::move(value)); +} + +bool criterion_params::has_value(const std::string &name) const +{ + return m_named_values.has_value(name); +} + +nvbench::int64_t criterion_params::get_int64(const std::string &name) const +{ + return m_named_values.get_int64(name); +} + +nvbench::float64_t criterion_params::get_float64(const std::string &name) const +{ + return m_named_values.get_float64(name); +} + +std::string criterion_params::get_string(const std::string &name) const +{ + return m_named_values.get_string(name); +} + +std::vector criterion_params::get_names() const +{ + return m_named_values.get_names(); +} + +nvbench::named_values::type criterion_params::get_type(const std::string &name) const +{ + return m_named_values.get_type(name); +} + + +} // namespace nvbench::detail diff --git a/nvbench/string_axis.cuh b/nvbench/string_axis.cuh index 2f526e7a..a8af16ef 100644 --- a/nvbench/string_axis.cuh +++ b/nvbench/string_axis.cuh @@ -36,25 +36,13 @@ struct string_axis final : public axis_base ~string_axis() final; - void set_inputs(std::vector inputs) - { - m_values = std::move(inputs); - } - [[nodiscard]] const std::string &get_value(std::size_t i) const - { - return m_values[i]; - } + void set_inputs(std::vector inputs) { m_values = std::move(inputs); } + [[nodiscard]] const std::string &get_value(std::size_t i) const { return m_values[i]; } private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const final { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_values.size(); } - std::string do_get_input_string(std::size_t i) const final - { - return m_values[i]; - } + std::string do_get_input_string(std::size_t i) const final { return m_values[i]; } std::string do_get_description(std::size_t) const final { return {}; } std::vector m_values; diff --git a/nvbench/summary.cuh b/nvbench/summary.cuh index 4576b15a..66093c05 100644 --- a/nvbench/summary.cuh +++ b/nvbench/summary.cuh @@ -92,10 +92,10 @@ struct summary : public nvbench::named_values {} // move-only - summary(const summary &) = delete; - summary(summary &&) = default; + summary(const summary &) = delete; + summary(summary &&) = default; summary &operator=(const summary &) = delete; - summary &operator=(summary &&) = default; + summary &operator=(summary &&) = default; void set_tag(std::string tag) { m_tag = std::move(tag); } [[nodiscard]] const std::string &get_tag() const { return m_tag; } diff --git a/nvbench/test_kernels.cuh b/nvbench/test_kernels.cuh index e08db315..f46216dc 100644 --- a/nvbench/test_kernels.cuh +++ b/nvbench/test_kernels.cuh @@ -18,6 +18,8 @@ #pragma once +#include + #include #include @@ -38,8 +40,8 @@ namespace nvbench __global__ void sleep_kernel(double seconds) { const auto start = cuda::std::chrono::high_resolution_clock::now(); - const auto ns = cuda::std::chrono::nanoseconds( - static_cast(seconds * 1000 * 1000 * 1000)); + const auto ns = + cuda::std::chrono::nanoseconds(static_cast(seconds * 1000 * 1000 * 1000)); const auto finish = start + ns; auto now = cuda::std::chrono::high_resolution_clock::now(); @@ -53,7 +55,7 @@ __global__ void sleep_kernel(double seconds) * Naive copy of `n` values from `in` -> `out`. */ template -__global__ void copy_kernel(const T* in, U* out, std::size_t n) +__global__ void copy_kernel(const T *in, U *out, std::size_t n) { const auto init = blockIdx.x * blockDim.x + threadIdx.x; const auto step = blockDim.x * gridDim.x; @@ -68,7 +70,7 @@ __global__ void copy_kernel(const T* in, U* out, std::size_t n) * For `i <- [0,n)`, `out[i] = in[i] % 2`. */ template -__global__ void mod2_kernel(const T* in, U* out, std::size_t n) +__global__ void mod2_kernel(const T *in, U *out, std::size_t n) { const auto init = blockIdx.x * blockDim.x + threadIdx.x; const auto step = blockDim.x * gridDim.x; @@ -79,4 +81,4 @@ __global__ void mod2_kernel(const T* in, U* out, std::size_t n) } } -} +} // namespace nvbench diff --git a/nvbench/type_axis.cuh b/nvbench/type_axis.cuh index 2ee91445..3a4c59a1 100644 --- a/nvbench/type_axis.cuh +++ b/nvbench/type_axis.cuh @@ -43,7 +43,7 @@ struct type_axis final : public axis_base template void set_inputs(); - void set_active_inputs(const std::vector& inputs); + void set_active_inputs(const std::vector &inputs); [[nodiscard]] bool get_is_active(const std::string &input) const; [[nodiscard]] bool get_is_active(std::size_t index) const; @@ -57,23 +57,13 @@ struct type_axis final : public axis_base /** * The index in this axis of the type with the specified `input_string`. */ - [[nodiscard]] std::size_t - get_type_index(const std::string &input_string) const; + [[nodiscard]] std::size_t get_type_index(const std::string &input_string) const; private: - std::unique_ptr do_clone() const - { - return std::make_unique(*this); - } + std::unique_ptr do_clone() const final { return std::make_unique(*this); } std::size_t do_get_size() const final { return m_input_strings.size(); } - std::string do_get_input_string(std::size_t i) const final - { - return m_input_strings[i]; - } - std::string do_get_description(std::size_t i) const final - { - return m_descriptions[i]; - } + std::string do_get_input_string(std::size_t i) const final { return m_input_strings[i]; } + std::string do_get_description(std::size_t i) const final { return m_descriptions[i]; } std::vector m_input_strings; std::vector m_descriptions; diff --git a/nvbench/type_axis.cxx b/nvbench/type_axis.cxx index af436ad1..f89ec1d5 100644 --- a/nvbench/type_axis.cxx +++ b/nvbench/type_axis.cxx @@ -35,10 +35,10 @@ void type_axis::set_active_inputs(const std::vector &inputs) { m_mask.clear(); m_mask.resize(m_input_strings.size(), false); - for (const auto& input : inputs) + for (const auto &input : inputs) { const auto idx = this->get_type_index(input); - m_mask[idx] = true; + m_mask[idx] = true; } } @@ -47,21 +47,16 @@ bool type_axis::get_is_active(const std::string &input) const return this->get_is_active(this->get_type_index(input)); } -bool type_axis::get_is_active(std::size_t idx) const -{ - return m_mask.at(idx); -} +bool type_axis::get_is_active(std::size_t idx) const { return m_mask.at(idx); } std::size_t type_axis::get_active_count() const { - return static_cast( - std::count(m_mask.cbegin(), m_mask.cend(), true)); + return static_cast(std::count(m_mask.cbegin(), m_mask.cend(), true)); } std::size_t type_axis::get_type_index(const std::string &input_string) const { - auto it = - std::find(m_input_strings.cbegin(), m_input_strings.cend(), input_string); + auto it = std::find(m_input_strings.cbegin(), m_input_strings.cend(), input_string); if (it == m_input_strings.end()) { NVBENCH_THROW(std::runtime_error, @@ -72,7 +67,7 @@ std::size_t type_axis::get_type_index(const std::string &input_string) const m_input_strings); } - return it - m_input_strings.cbegin(); + return static_cast(it - m_input_strings.cbegin()); } } // namespace nvbench diff --git a/nvbench/type_strings.cuh b/nvbench/type_strings.cuh index 287e0f93..b915854c 100644 --- a/nvbench/type_strings.cuh +++ b/nvbench/type_strings.cuh @@ -30,17 +30,17 @@ namespace nvbench std::string demangle(const std::string &str); template -std::string demangle() { return demangle(typeid(T).name()); } +std::string demangle() +{ + return demangle(typeid(T).name()); +} template struct type_strings { // The string used to identify the type in shorthand (e.g. output tables and // CLI options): - static std::string input_string() - { - return nvbench::demangle(); - } + static std::string input_string() { return nvbench::demangle(); } // A more descriptive identifier for the type, if input_string is not a common // identifier. May be blank if `input_string` is obvious. @@ -56,10 +56,7 @@ struct type_strings> // A more descriptive identifier for the type, if input_string is not a common // identifier. May be blank if `input_string` is obvious. - static std::string description() - { - return nvbench::demangle>(); - } + static std::string description() { return nvbench::demangle>(); } }; } // namespace nvbench @@ -67,15 +64,15 @@ struct type_strings> /*! * Declare an `input_string` and `description` to use with a specific `type`. */ -#define NVBENCH_DECLARE_TYPE_STRINGS(Type, InputString, Description) \ - namespace nvbench \ - { \ - template <> \ - struct type_strings \ - { \ - static std::string input_string() { return {InputString}; } \ - static std::string description() { return {Description}; } \ - }; \ +#define NVBENCH_DECLARE_TYPE_STRINGS(Type, InputString, Description) \ + namespace nvbench \ + { \ + template <> \ + struct type_strings \ + { \ + static std::string input_string() { return {InputString}; } \ + static std::string description() { return {Description}; } \ + }; \ } NVBENCH_DECLARE_TYPE_STRINGS(nvbench::int8_t, "I8", "int8_t"); diff --git a/scripts/nvbench_histogram.py b/scripts/nvbench_histogram.py old mode 100644 new mode 100755 index 1df17cc4..5c37d293 --- a/scripts/nvbench_histogram.py +++ b/scripts/nvbench_histogram.py @@ -38,6 +38,20 @@ def parse_files(): return filenames +def extract_filename(summary): + summary_data = summary["data"] + value_data = next(filter(lambda v: v["name"] == "filename", summary_data)) + assert(value_data["type"] == "string") + return value_data["value"] + + +def extract_size(summary): + summary_data = summary["data"] + value_data = next(filter(lambda v: v["name"] == "size", summary_data)) + assert(value_data["type"] == "int64") + return int(value_data["value"]) + + def parse_samples_meta(filename, state): summaries = state["summaries"] if not summaries: @@ -49,13 +63,13 @@ def parse_samples_meta(filename, state): if not summary: return None, None - sample_filename = summary["filename"]["value"] + sample_filename = extract_filename(summary) # If not absolute, the path is relative to the associated .json file: if not os.path.isabs(sample_filename): sample_filename = os.path.join(os.path.dirname(filename), sample_filename) - sample_count = int(summary["size"]["value"]) + sample_count = extract_size(summary) return sample_count, sample_filename diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 4928ebc9..f4072586 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -4,22 +4,36 @@ set(test_srcs create.cu cuda_timer.cu cpu_timer.cu + criterion_manager.cu + criterion_params.cu + custom_main_custom_args.cu + custom_main_custom_exceptions.cu + custom_main_global_state_raii.cu enum_type_list.cu + entropy_criterion.cu float64_axis.cu int64_axis.cu named_values.cu option_parser.cu range.cu + reset_error.cu ring_buffer.cu runner.cu state.cu + statistics.cu state_generator.cu + stdrel_criterion.cu string_axis.cu type_axis.cu type_list.cu ) -# Metatarget for all examples: +# Custom arguments: +# CTest commands+args can't be modified after creation, so we need to rely on substitution. +set(NVBench_TEST_ARGS_nvbench.test.custom_main_custom_args "--quiet" "--my-custom-arg" "--run-once" "-d" "0") +set(NVBench_TEST_ARGS_nvbench.test.custom_main_custom_exceptions "--quiet" "--run-once" "-d" "0") + +# Metatarget for all tests: add_custom_target(nvbench.test.all) add_dependencies(nvbench.all nvbench.test.all) @@ -31,10 +45,14 @@ foreach(test_src IN LISTS test_srcs) target_link_libraries(${test_name} PRIVATE nvbench::nvbench fmt) set_target_properties(${test_name} PROPERTIES COMPILE_FEATURES cuda_std_17) nvbench_config_target(${test_name}) - add_test(NAME ${test_name} COMMAND "$") + add_test(NAME ${test_name} COMMAND "$" ${NVBench_TEST_ARGS_${test_name}}) add_dependencies(nvbench.test.all ${test_name}) endforeach() +set_tests_properties(nvbench.test.custom_main_custom_exceptions PROPERTIES + PASS_REGULAR_EXPRESSION "Custom error detected: Expected exception thrown." +) + add_subdirectory(cmake) add_subdirectory(device) diff --git a/testing/axes_metadata.cu b/testing/axes_metadata.cu index 1ea7dd61..cf4d93a6 100644 --- a/testing/axes_metadata.cu +++ b/testing/axes_metadata.cu @@ -129,13 +129,13 @@ void test_type_axes() fmt::memory_buffer buffer; for (const auto &axis : axes.get_axes()) { - fmt::format_to(buffer, "Axis: {}\n", axis->get_name()); + fmt::format_to(std::back_inserter(buffer), "Axis: {}\n", axis->get_name()); const auto num_values = axis->get_size(); for (std::size_t i = 0; i < num_values; ++i) { auto input_string = axis->get_input_string(i); auto description = axis->get_description(i); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), " - {}{}\n", input_string, description.empty() ? "" @@ -159,7 +159,7 @@ Axis: Other const std::string test = fmt::to_string(buffer); const auto diff = std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend()); - const auto idx = diff.second - test.cbegin(); + const auto idx = static_cast(diff.second - test.cbegin()); ASSERT_MSG(test == ref, "Differs at character {}.\n" "Expected:\n\"{}\"\n\n" diff --git a/testing/benchmark.cu b/testing/benchmark.cu index 71ffe033..9581b12c 100644 --- a/testing/benchmark.cu +++ b/testing/benchmark.cu @@ -44,13 +44,13 @@ std::vector sort(std::vector &&vec) void no_op_generator(nvbench::state &state) { fmt::memory_buffer params; - fmt::format_to(params, "Params:"); + fmt::format_to(std::back_inserter(params), "Params:"); const auto &axis_values = state.get_axis_values(); for (const auto &name : sort(axis_values.get_names())) { std::visit( [¶ms, &name](const auto &value) { - fmt::format_to(params, " {}: {}", name, value); + fmt::format_to(std::back_inserter(params), " {}: {}", name, value); }, axis_values.get_value(name)); } @@ -101,13 +101,13 @@ void test_type_axes() const auto &axes = bench.get_axes().get_axes(); for (const auto &axis : axes) { - fmt::format_to(buffer, "Axis: {}\n", axis->get_name()); + fmt::format_to(std::back_inserter(buffer), "Axis: {}\n", axis->get_name()); const auto num_values = axis->get_size(); for (std::size_t i = 0; i < num_values; ++i) { auto input_string = axis->get_input_string(i); auto description = axis->get_description(i); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), " - {}{}\n", input_string, description.empty() ? "" @@ -148,7 +148,7 @@ void test_type_configs() using Integer = nvbench::tl::get<0, Conf>; using Float = nvbench::tl::get<1, Conf>; using Other = nvbench::tl::get<2, Conf>; - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "type_configs[{:2d}] = <{:>3}, {:>3}, {:>4}>\n", idx++, nvbench::type_strings::input_string(), diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 2cb2f5fa..6932c00c 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -12,6 +12,7 @@ set(cmake_opts -D "CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" -D "CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}" + -D "CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}" -D "CMAKE_CUDA_ARCHITECTURES=${arches}" ) diff --git a/testing/create.cu b/testing/create.cu index d7d9586e..6ed7fff5 100644 --- a/testing/create.cu +++ b/testing/create.cu @@ -44,13 +44,13 @@ std::vector sort(std::vector &&vec) void no_op_generator(nvbench::state &state) { fmt::memory_buffer params; - fmt::format_to(params, "Params:"); + fmt::format_to(std::back_inserter(params), "Params:"); const auto &axis_values = state.get_axis_values(); for (const auto &name : sort(axis_values.get_names())) { std::visit( [¶ms, &name](const auto &value) { - fmt::format_to(params, " {}: {}", name, value); + fmt::format_to(std::back_inserter(params), " {}: {}", name, value); }, axis_values.get_value(name)); } @@ -109,7 +109,7 @@ std::string run_and_get_state_string(nvbench::benchmark_base &bench, for (const auto &state : states) { ASSERT(state.is_skipped()); - fmt::format_to(buffer, "{}\n", state.get_skip_reason()); + fmt::format_to(std::back_inserter(buffer), "{}\n", state.get_skip_reason()); } return fmt::to_string(buffer); } diff --git a/testing/criterion_manager.cu b/testing/criterion_manager.cu new file mode 100644 index 00000000..da0ddb0f --- /dev/null +++ b/testing/criterion_manager.cu @@ -0,0 +1,75 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +void test_standard_criteria_exist() +{ + ASSERT(nvbench::criterion_manager::get().get_criterion("stdrel").get_name() == "stdrel"); + ASSERT(nvbench::criterion_manager::get().get_criterion("entropy").get_name() == "entropy"); +} + +class custom_criterion : public nvbench::stopping_criterion_base +{ +public: + custom_criterion() + : nvbench::stopping_criterion_base("custom", nvbench::criterion_params{}) + {} + +protected: + virtual void do_initialize() override {} + virtual void do_add_measurement(nvbench::float64_t /* measurement */) override {} + virtual bool do_is_finished() override { return true; } +}; + +void test_no_duplicates_are_allowed() +{ + nvbench::criterion_manager& manager = nvbench::criterion_manager::get(); + bool exception_triggered = false; + + try { + [[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom"); + } catch(...) { + exception_triggered = true; + } + ASSERT(exception_triggered); + + std::unique_ptr custom_ptr = std::make_unique(); + custom_criterion* custom_raw = custom_ptr.get(); + ASSERT(&manager.add(std::move(custom_ptr)) == custom_raw); + + nvbench::stopping_criterion_base& custom = nvbench::criterion_manager::get().get_criterion("custom"); + ASSERT(custom_raw == &custom); + + exception_triggered = false; + try { + manager.add(std::make_unique()); + } catch(...) { + exception_triggered = true; + } + ASSERT(exception_triggered); +} + +int main() +{ + test_standard_criteria_exist(); + test_no_duplicates_are_allowed(); +} diff --git a/testing/criterion_params.cu b/testing/criterion_params.cu new file mode 100644 index 00000000..4eceefaa --- /dev/null +++ b/testing/criterion_params.cu @@ -0,0 +1,63 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +void test_compat_parameters() +{ + nvbench::criterion_params params; + + ASSERT(params.has_value("max-noise")); + ASSERT(params.has_value("min-time")); + + ASSERT(params.get_float64("max-noise") == nvbench::detail::compat_max_noise()); + ASSERT(params.get_float64("min-time") == nvbench::detail::compat_min_time()); +} + +void test_compat_overwrite() +{ + nvbench::criterion_params params; + params.set_float64("max-noise", 40000.0); + params.set_float64("min-time", 42000.0); + + ASSERT(params.get_float64("max-noise") == 40000.0); + ASSERT(params.get_float64("min-time") == 42000.0); +} + +void test_overwrite() +{ + nvbench::criterion_params params; + ASSERT(!params.has_value("custom")); + + params.set_float64("custom", 42.0); + ASSERT(params.get_float64("custom") == 42.0); + + params.set_float64("custom", 4.2); + ASSERT(params.get_float64("custom") == 4.2); +} + +int main() +{ + test_compat_parameters(); + test_compat_overwrite(); + test_overwrite(); +} + diff --git a/testing/custom_main_custom_args.cu b/testing/custom_main_custom_args.cu new file mode 100644 index 00000000..f7e331e3 --- /dev/null +++ b/testing/custom_main_custom_args.cu @@ -0,0 +1,132 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "nvbench/cuda_call.cuh" + +/****************************************************************************** + * Install custom parser. + * sSee for more details. + ******************************************************************************/ + +// +// Step 1: Define a custom argument handler that accepts a vector of strings. +// - This handler should modify the vector in place to remove any custom +// arguments it handles. NVbench will then parse the remaining arguments. +// - The handler should also update any application state needed to handle +// the custom arguments. +// + +// User code to handle a specific argument: +void handle_my_custom_arg(); + +// NVBench hook for modiifying the command line arguments before parsing: +void custom_arg_handler(std::vector &args) +{ + // Handle and remove "--my-custom-arg" + if (auto it = std::find(args.begin(), args.end(), "--my-custom-arg"); it != args.end()) + { + handle_my_custom_arg(); + args.erase(it); + } +} + +// +// Step 2: Install the custom argument handler. +// - This is done by defining a macro that invokes the custom argument handler. +// + +// Install the custom argument handler: +// Either define this before any NVBench headers are included, or undefine and redefine: +#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) custom_arg_handler(args) + +// Step 3: Define `main` +// +// After installing the custom argument handler, define the main function using: +// +// ``` +// NVBENCH_MAIN +// ``` +// +// Here, this is done at the end of this file. + +/****************************************************************************** + * Unit test verification: + ******************************************************************************/ + +// Track whether the args are found / handled. +bool h_custom_arg_found = false; +bool h_handled_on_device = false; +__device__ bool d_custom_arg_found = false; +__device__ bool d_handled_on_device = false; + +// Copy host values to device: +void copy_host_state_to_device() +{ + NVBENCH_CUDA_CALL(cudaMemcpyToSymbol(d_custom_arg_found, &h_custom_arg_found, sizeof(bool))); + NVBENCH_CUDA_CALL(cudaMemcpyToSymbol(d_handled_on_device, &h_handled_on_device, sizeof(bool))); +} + +// Copy device values to host: +void copy_device_state_to_host() +{ + NVBENCH_CUDA_CALL(cudaMemcpyFromSymbol(&h_custom_arg_found, d_custom_arg_found, sizeof(bool))); + NVBENCH_CUDA_CALL(cudaMemcpyFromSymbol(&h_handled_on_device, d_handled_on_device, sizeof(bool))); +} + +void handle_my_custom_arg() +{ + h_custom_arg_found = true; + copy_host_state_to_device(); +} + +void verify() +{ + copy_device_state_to_host(); + if (!h_custom_arg_found) + { + throw std::runtime_error("Custom argument not detected."); + } + if (!h_handled_on_device) + { + throw std::runtime_error("Custom argument not handled on device."); + } +} + +// Install a verification check to ensure the custom argument was handled. +// Use the `PRE` finalize hook to ensure we check device state before resetting the context. +#undef NVBENCH_MAIN_FINALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_FINALIZE_CUSTOM_PRE() verify() + +// Simple kernel/benchmark to make sure that the handler can successfully modify CUDA state: +__global__ void kernel() +{ + if (d_custom_arg_found) + { + d_handled_on_device = true; + } +} +void bench(nvbench::state &state) +{ + state.exec([](nvbench::launch &) { kernel<<<1, 1>>>(); }); +} +NVBENCH_BENCH(bench); + +// Define the customized main function: +NVBENCH_MAIN diff --git a/testing/custom_main_custom_exceptions.cu b/testing/custom_main_custom_exceptions.cu new file mode 100644 index 00000000..b1f9b9c2 --- /dev/null +++ b/testing/custom_main_custom_exceptions.cu @@ -0,0 +1,64 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +/****************************************************************************** + * Install exception handler around the NVBench main body. This is used + * to print helpful information when a user exception is thrown before exiting. + * + * Note that this will **NOT** be used when a benchmark throws an exception. + * That will fail the benchmark and note the exception, and continue + * execution. + * + * This is used to catch exceptions in user extensions of NVBench, things like + * customized initialization, command line parsing, finalization, etc. See + * for more details. + ******************************************************************************/ + +struct user_exception : public std::runtime_error +{ + user_exception() + : std::runtime_error("Expected exception thrown.") + {} +}; + +// User code to handle user exception: +void handle_my_exception(user_exception &e) +{ + std::cerr << "Custom error detected: " << e.what() << std::endl; + std::exit(1); +} + +// Install the exception handler around the NVBench main body. +// NVBench will have sensible defaults for common exceptions following this if no terminating catch +// block is defined. +// Either define this before any NVBench headers are included, or undefine and redefine. +#undef NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM +#define NVBENCH_MAIN_CATCH_EXCEPTIONS_CUSTOM \ + catch (user_exception & e) { handle_my_exception(e); } + +// For testing purposes, install a argument parser that throws: +void really_robust_argument_parser(std::vector &) { throw user_exception(); } +#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER +#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) really_robust_argument_parser(args); + +// Define the customized main function: +NVBENCH_MAIN diff --git a/testing/custom_main_global_state_raii.cu b/testing/custom_main_global_state_raii.cu new file mode 100644 index 00000000..e3584ab6 --- /dev/null +++ b/testing/custom_main_global_state_raii.cu @@ -0,0 +1,121 @@ +/* + * Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +/****************************************************************************** + * Test having global state that is initialized and finalized via RAII. + *****************************************************************************/ + +struct raii +{ + const char m_ref_data[6]; + char *m_data; + bool m_cuda; + + const char *m_outer_data; + bool m_outer_cuda; + + explicit raii(bool cuda, char *outer_data = nullptr, bool outer_cuda = false) + : m_ref_data{'a', 'b', 'c', '1', '2', '3'} + , m_data(nullptr) + , m_cuda(cuda) + , m_outer_data(outer_data) + , m_outer_cuda(outer_cuda) + { + if (m_cuda) + { + printf("(%p) RAII test: allocating device memory\n", this); + NVBENCH_CUDA_CALL(cudaMalloc(&m_data, 6)); + NVBENCH_CUDA_CALL(cudaMemcpy(m_data, m_ref_data, 6, cudaMemcpyHostToDevice)); + } + else + { + printf("(%p) RAII test: allocating host memory\n", this); + m_data = new char[6]; + std::copy(m_ref_data, m_ref_data + 6, m_data); + } + } + + ~raii() + { + this->verify(); + if (m_cuda) + { + printf("(%p) RAII test: invalidating device memory\n", this); + NVBENCH_CUDA_CALL(cudaMemset(m_data, 0, 6)); + printf("(%p) RAII test: freeing device memory\n", this); + NVBENCH_CUDA_CALL(cudaFree(m_data)); + } + else + { + printf("(%p) RAII test: invalidating host memory\n", this); + std::fill(m_data, m_data + 6, '\0'); + printf("(%p) RAII test: freeing host memory\n", this); + delete[] m_data; + } + } + + void verify() noexcept + { + printf("(%p) RAII test: verifying instance state\n", this); + this->verify(m_cuda, m_data); + if (m_outer_data) + { + printf("(%p) RAII test: verifying outer state\n", this); + this->verify(m_outer_cuda, m_outer_data); + } + } + + void verify(bool cuda, const char *data) noexcept + { + if (cuda) + { + char test_data[6]; + NVBENCH_CUDA_CALL(cudaMemcpy(test_data, data, 6, cudaMemcpyDeviceToHost)); + if (strncmp(test_data, m_ref_data, 6) != 0) + { + printf("(%p) RAII test failed: device data mismatch\n", this); + std::exit(1); + } + } + else + { + if (strncmp(data, m_ref_data, 6) != 0) + { + printf("(%p) RAII test failed: host data mismatch\n", this); + std::exit(1); + } + } + } +}; + +// These will be destroyed in the opposite order in which they are created: + +#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_PRE(argc, argv) raii raii_outer(false); + +#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST +#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \ + [[maybe_unused]] raii raii_inner(true, raii_outer.m_data, raii_outer.m_cuda); + +NVBENCH_MAIN diff --git a/testing/entropy_criterion.cu b/testing/entropy_criterion.cu new file mode 100644 index 00000000..df489c96 --- /dev/null +++ b/testing/entropy_criterion.cu @@ -0,0 +1,91 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "test_asserts.cuh" + +#include +#include +#include + +void test_const() +{ + nvbench::criterion_params params; + nvbench::detail::entropy_criterion criterion; + + criterion.initialize(params); + for (int i = 0; i < 6; i++) + { // nvbench wants at least 5 to compute the standard deviation + criterion.add_measurement(42.0); + } + ASSERT(criterion.is_finished()); +} + +void produce_entropy_arch(nvbench::detail::entropy_criterion &criterion) +{ + /* + * This pattern is designed to simulate the entropy: + * + * 0.0, 1.0, 1.5, 2.0, 2.3, 2.5 <---- no unexpected measurement after this point + * 2.5, 2.4, 2.2, 2.1, 2.0, 1.9 <-+ + * 1.8, 1.7, 1.6, 1.6, 1.5, 1.4 | + * 1.4, 1.3, 1.3, 1.3, 1.2, 1.2 | + * 1.1, 1.1, 1.1, 1.0, 1.0, 1.0 +-- entropy only decreases after 5-th sample, + * 1.0, 0.9, 0.9, 0.9, 0.9, 0.9 | so the slope should be negative + * 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 | + * 0.7, 0.7, 0.7, 0.7, 0.7, 0.7 <-+ + */ + for (nvbench::float64_t x = 0.0; x < 50.0; x += 1.0) + { + criterion.add_measurement(x > 5.0 ? 5.0 : x); + } +} + +void test_entropy_arch() +{ + nvbench::detail::entropy_criterion criterion; + + // The R2 should be around 0.5 + // The angle should be around -1.83 + nvbench::criterion_params params; + params.set_float64("min-r2", 0.3); + params.set_float64("max-angle", -1.0); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(criterion.is_finished()); + + params.set_float64("min-r2", 0.7); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(!criterion.is_finished()); + + params.set_float64("min-r2", 0.3); + params.set_float64("max-angle", -2.0); + criterion.initialize(params); + produce_entropy_arch(criterion); + ASSERT(!criterion.is_finished()); +} + +int main() +{ + test_const(); + test_entropy_arch(); +} diff --git a/testing/enum_type_list.cu b/testing/enum_type_list.cu index 05b26052..88535ba3 100644 --- a/testing/enum_type_list.cu +++ b/testing/enum_type_list.cu @@ -24,6 +24,11 @@ #include +// If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39. +#if defined(__GNUC__) && __GNUC__ == 7 +#define USING_GCC_7 +#endif + enum class scoped_enum { val_1, @@ -109,9 +114,11 @@ void test_int() void test_scoped_enum() { +#ifndef USING_GCC_7 ASSERT(( std::is_same_v, nvbench::type_list>>)); +#endif ASSERT(( std::is_same_v, nvbench::type_list>>)); @@ -132,6 +140,7 @@ void test_unscoped_enum() nvbench::type_list, nvbench::enum_type, nvbench::enum_type>>)); +#endif } void test_scoped_enum_type_strings() diff --git a/testing/option_parser.cu b/testing/option_parser.cu index 9d7e6a9c..167e833f 100644 --- a/testing/option_parser.cu +++ b/testing/option_parser.cu @@ -57,8 +57,8 @@ states_to_string(const std::vector &states) std::string table_format = "| {:^5} | {:^10} | {:^4} | {:^4} | {:^4} " "| {:^4} | {:^6} | {:^8} |\n"; - fmt::format_to(buffer, "\n"); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "\n"); + fmt::format_to(std::back_inserter(buffer), table_format, "State", "TypeConfig", @@ -72,7 +72,7 @@ states_to_string(const std::vector &states) std::size_t config = 0; for (const auto &state : states) { - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), table_format, config++, state.get_type_config_index(), @@ -1229,6 +1229,27 @@ void test_timeout() ASSERT(std::abs(states[0].get_timeout() - 12345e2) < 1.); } +void test_stopping_criterion() +{ + nvbench::option_parser parser; + parser.parse( + {"--benchmark", "DummyBench", + "--stopping-criterion", "entropy", + "--max-angle", "0.42", + "--min-r2", "0.6"}); + const auto& states = parser_to_states(parser); + + ASSERT(states.size() == 1); + ASSERT(states[0].get_stopping_criterion() == "entropy"); + + const nvbench::criterion_params &criterion_params = states[0].get_criterion_params(); + ASSERT(criterion_params.has_value("max-angle")); + ASSERT(criterion_params.has_value("min-r2")); + + ASSERT(criterion_params.get_float64("max-angle") == 0.42); + ASSERT(criterion_params.get_float64("min-r2") == 0.6); +} + int main() try { @@ -1265,6 +1286,8 @@ try test_skip_time(); test_timeout(); + test_stopping_criterion(); + return 0; } catch (std::exception &err) diff --git a/testing/reset_error.cu b/testing/reset_error.cu new file mode 100644 index 00000000..8fece930 --- /dev/null +++ b/testing/reset_error.cu @@ -0,0 +1,30 @@ +#include + +#include "test_asserts.cuh" + + +namespace +{ + __global__ void multiply5(const int32_t* __restrict__ a, int32_t* __restrict__ b) + { + const auto id = blockIdx.x * blockDim.x + threadIdx.x; + b[id] = 5 * a[id]; + } +} + +int main() +{ + multiply5<<<256, 256>>>(nullptr, nullptr); + + try + { + NVBENCH_CUDA_CALL(cudaStreamSynchronize(0)); + ASSERT(false); + } + catch (const std::runtime_error &) + { + ASSERT(cudaGetLastError() == cudaError_t::cudaSuccess); + } + + return 0; +} diff --git a/testing/ring_buffer.cu b/testing/ring_buffer.cu index 4e138056..5af53431 100644 --- a/testing/ring_buffer.cu +++ b/testing/ring_buffer.cu @@ -27,7 +27,7 @@ template bool equal(const nvbench::detail::ring_buffer &buffer, const std::vector &reference) { - return std::equal(buffer.cbegin(), buffer.cend(), reference.cbegin()); + return std::equal(buffer.begin(), buffer.end(), reference.begin()); } int main() @@ -62,12 +62,12 @@ try ASSERT(avg.size() == 3); ASSERT(avg.capacity() == 3); ASSERT_MSG(avg.back() == 5, " (got {})", avg.back()); - ASSERT(equal(avg, {5, 2, -15})); + ASSERT(equal(avg, {2, -15, 5})); avg.push_back(0); ASSERT(avg.size() == 3); ASSERT(avg.capacity() == 3); - ASSERT(equal(avg, {5, 0, -15})); + ASSERT(equal(avg, {-15, 5, 0})); ASSERT_MSG(avg.back() == 0, " (got {})", avg.back()); avg.push_back(128); diff --git a/testing/runner.cu b/testing/runner.cu index 157e4548..6335d276 100644 --- a/testing/runner.cu +++ b/testing/runner.cu @@ -43,13 +43,13 @@ std::vector sort(std::vector &&vec) void no_op_generator(nvbench::state &state) { fmt::memory_buffer params; - fmt::format_to(params, "Params:"); + fmt::format_to(std::back_inserter(params), "Params:"); const auto &axis_values = state.get_axis_values(); for (const auto &name : sort(axis_values.get_names())) { std::visit( [¶ms, &name](const auto &value) { - fmt::format_to(params, " {}: {}", name, value); + fmt::format_to(std::back_inserter(params), " {}: {}", name, value); }, axis_values.get_value(name)); } @@ -124,7 +124,7 @@ void test_non_types() for (const auto &state : bench.get_states()) { ASSERT(state.is_skipped() == true); - fmt::format_to(buffer, "{}\n", state.get_skip_reason()); + fmt::format_to(std::back_inserter(buffer), "{}\n", state.get_skip_reason()); } const std::string ref = R"expected(Params: Float: 11 Int: 1 String: One @@ -184,7 +184,7 @@ void test_types() for (const auto &state : bench.get_states()) { ASSERT(state.is_skipped() == true); - fmt::format_to(buffer, "{}\n", state.get_skip_reason()); + fmt::format_to(std::back_inserter(buffer), "{}\n", state.get_skip_reason()); } const std::string ref = R"expected(Params: FloatT: F32 IntT: I32 MiscT: bool @@ -228,7 +228,7 @@ void test_both() for (const auto &state : bench.get_states()) { ASSERT(state.is_skipped() == true); - fmt::format_to(buffer, "{}\n", state.get_skip_reason()); + fmt::format_to(std::back_inserter(buffer), "{}\n", state.get_skip_reason()); } const std::string ref = diff --git a/testing/state_generator.cu b/testing/state_generator.cu index cb584be5..f75be021 100644 --- a/testing/state_generator.cu +++ b/testing/state_generator.cu @@ -89,17 +89,17 @@ void test_basic() for (sg.init(); sg.iter_valid(); sg.next()) { line.clear(); - fmt::format_to(line, "| {:^2}", line_num++); + fmt::format_to(std::back_inserter(line), "| {:^2}", line_num++); for (auto &axis_index : sg.get_current_indices()) { ASSERT(axis_index.type == nvbench::axis_type::string); - fmt::format_to(line, + fmt::format_to(std::back_inserter(line), " | {}: {}/{}", axis_index.axis, axis_index.index, axis_index.size); } - fmt::format_to(buffer, "{} |\n", fmt::to_string(line)); + fmt::format_to(std::back_inserter(buffer), "{} |\n", fmt::to_string(line)); } const std::string ref = @@ -166,8 +166,8 @@ void test_create() const std::string table_format = "| {:^5} | {:^10} | {:^7} | {:^7} | {:^9} | {:^9} |\n"; - fmt::format_to(buffer, "\n"); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "\n"); + fmt::format_to(std::back_inserter(buffer), table_format, "State", "TypeConfig", @@ -179,7 +179,7 @@ void test_create() std::size_t config = 0; for (const auto &state : states) { - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), table_format, config++, state.get_type_config_index(), @@ -258,8 +258,8 @@ void test_create_with_types() std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} " "| {:^7} | {:^9} | {:^9} |\n"; - fmt::format_to(buffer, "\n"); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "\n"); + fmt::format_to(std::back_inserter(buffer), table_format, "State", "TypeConfig", @@ -274,7 +274,7 @@ void test_create_with_types() std::size_t config = 0; for (const auto &state : states) { - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), table_format, config++, state.get_type_config_index(), @@ -607,8 +607,8 @@ void test_create_with_masked_types() std::string table_format = "| {:^5} | {:^10} | {:^6} | {:^4} | {:^4} | {:^7} " "| {:^7} | {:^9} | {:^9} |\n"; - fmt::format_to(buffer, "\n"); - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), "\n"); + fmt::format_to(std::back_inserter(buffer), table_format, "State", "TypeConfig", @@ -623,7 +623,7 @@ void test_create_with_masked_types() std::size_t config = 0; for (const auto &state : states) { - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), table_format, config++, state.get_type_config_index(), @@ -737,13 +737,13 @@ void test_devices() fmt::memory_buffer buffer; const std::string table_format = "| {:^5} | {:^6} | {:^5} | {:^3} |\n"; - fmt::format_to(buffer, "\n"); - fmt::format_to(buffer, table_format, "State", "Device", "S", "I"); + fmt::format_to(std::back_inserter(buffer), "\n"); + fmt::format_to(std::back_inserter(buffer), table_format, "State", "Device", "S", "I"); std::size_t config = 0; for (const auto &state : states) { - fmt::format_to(buffer, + fmt::format_to(std::back_inserter(buffer), table_format, config++, state.get_device()->get_id(), diff --git a/testing/statistics.cu b/testing/statistics.cu new file mode 100644 index 00000000..a67a0448 --- /dev/null +++ b/testing/statistics.cu @@ -0,0 +1,129 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "test_asserts.cuh" + +#include +#include + +namespace statistics = nvbench::detail::statistics; + +void test_mean() +{ + { + std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; + const nvbench::float64_t actual = statistics::compute_mean(std::begin(data), std::end(data)); + const nvbench::float64_t expected = 3.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + + { + std::vector data; + const bool finite = std::isfinite(statistics::compute_mean(std::begin(data), std::end(data))); + ASSERT(!finite); + } +} + +void test_std() +{ + std::vector data{1.0, 2.0, 3.0, 4.0, 5.0}; + const nvbench::float64_t mean = 3.0; + const nvbench::float64_t actual = statistics::standard_deviation(std::begin(data), std::end(data), mean); + const nvbench::float64_t expected = 1.581; + ASSERT(std::abs(actual - expected) < 0.001); +} + +void test_lin_regression() +{ + { + std::vector ys{1.0, 2.0, 3.0, 4.0, 5.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == 1.0); + ASSERT(intercept == 1.0); + } + { + std::vector ys{42.0, 42.0, 42.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == 0.0); + ASSERT(intercept == 42.0); + } + { + std::vector ys{8.0, 4.0, 0.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + ASSERT(slope == -4.0); + ASSERT(intercept == 8.0); + } +} + +void test_r2() +{ + { + std::vector ys{1.0, 2.0, 3.0, 4.0, 5.0}; + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + const nvbench::float64_t expected = 1.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + std::vector signal{1.0, 2.0, 3.0, 4.0, 5.0}; + std::vector noise{-1.0, 1.0, -1.0, 1.0, -1.0}; + std::vector ys(signal.size()); + + std::transform(std::begin(signal), + std::end(signal), + std::begin(noise), + std::begin(ys), + std::plus()); + + auto [slope, intercept] = statistics::compute_linear_regression(std::begin(ys), std::end(ys)); + const nvbench::float64_t expected = 0.675; + const nvbench::float64_t actual = statistics::compute_r2(std::begin(ys), std::end(ys), slope, intercept); + ASSERT(std::abs(actual - expected) < 0.001); + } +} + +void test_slope_conversion() +{ + { + const nvbench::float64_t actual = statistics::slope2deg(0.0); + const nvbench::float64_t expected = 0.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + const nvbench::float64_t actual = statistics::slope2deg(1.0); + const nvbench::float64_t expected = 45.0; + ASSERT(std::abs(actual - expected) < 0.001); + } + { + const nvbench::float64_t actual = statistics::slope2deg(5.0); + const nvbench::float64_t expected = 78.69; + ASSERT(std::abs(actual - expected) < 0.001); + } +} + +int main() +{ + test_mean(); + test_std(); + test_lin_regression(); + test_r2(); + test_slope_conversion(); +} diff --git a/testing/stdrel_criterion.cu b/testing/stdrel_criterion.cu new file mode 100644 index 00000000..f0affea0 --- /dev/null +++ b/testing/stdrel_criterion.cu @@ -0,0 +1,84 @@ +/* + * Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "test_asserts.cuh" + +#include +#include +#include + +void test_const() +{ + nvbench::criterion_params params; + nvbench::detail::stdrel_criterion criterion; + + criterion.initialize(params); + for (int i = 0; i < 5; i++) + { // nvbench wants at least 5 to compute the standard deviation + criterion.add_measurement(42.0); + } + ASSERT(criterion.is_finished()); +} + +std::vector generate(double mean, double rel_std_dev, int size) +{ + static std::mt19937::result_type seed = 0; + std::mt19937 gen(seed++); + std::vector v(static_cast(size)); + std::normal_distribution dist(mean, mean * rel_std_dev); + std::generate(v.begin(), v.end(), [&]{ return dist(gen); }); + return v; +} + +void test_stdrel() +{ + const nvbench::int64_t size = 10; + const nvbench::float64_t mean = 42.0; + const nvbench::float64_t max_noise = 0.1; + + nvbench::criterion_params params; + params.set_float64("max-noise", max_noise); + + nvbench::detail::stdrel_criterion criterion; + criterion.initialize(params); + + for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size)) + { + criterion.add_measurement(measurement); + } + ASSERT(criterion.is_finished()); + + params.set_float64("max-noise", max_noise); + criterion.initialize(params); + + for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size)) + { + criterion.add_measurement(measurement); + } + ASSERT(!criterion.is_finished()); +} + +int main() +{ + test_const(); + test_stdrel(); +}